Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml +19 -0
- backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml +19 -0
- backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml +64 -0
- backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml +61 -0
- backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml +42 -0
- backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml +46 -0
- backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml +42 -0
- backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml +46 -0
- backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml +42 -0
- backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml +46 -0
- backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml +42 -0
- backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml +46 -0
- backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml +42 -0
- backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml +46 -0
- backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml +42 -0
- backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml +46 -0
- backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml +42 -0
- backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml +46 -0
- backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml +46 -0
- backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml +42 -0
- backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml +1 -1
- backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml +46 -0
- backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml +1 -1
- backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml +1 -1
- backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml +19 -0
- backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml +19 -0
- backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml +19 -0
- backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml +19 -0
- backup/change_config_add.py +8 -6
- backup/misc/__pycache__/utils.cpython-38.pyc +0 -0
- backup/misc/utils.py +7 -2
- backup/opts.py +1 -0
- backup/pdvc/__pycache__/pdvc.cpython-38.pyc +0 -0
- backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc +0 -0
- backup/pdvc/pdvc.py +2 -0
- backup/pdvc/video_segmentation.py +59 -0
- backup/train.py +102 -11
- backup/train_fewshot.py +1 -1
- backup/train_ft2_gt.py +6 -5
- backup/train_pre_ft_gt.py +2 -2
backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 20
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 20
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 20
|
| 13 |
+
width_ratio: 2
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 20
|
| 13 |
+
width_ratio: 2
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 30
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 30
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 30
|
| 13 |
+
width_ratio: 2
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 30
|
| 13 |
+
width_ratio: 2
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 40
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 40
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 40
|
| 13 |
+
width_ratio: 2
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 40
|
| 13 |
+
width_ratio: 2
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 2
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: anet
|
| 2 |
+
|
| 3 |
+
visual_feature_type: c3d
|
| 4 |
+
visual_feature_folder: 'data/anet/features/c3d'
|
| 5 |
+
feature_dim: 500
|
| 6 |
+
invalid_video_json: []
|
| 7 |
+
train_proposal_file: data/generated_proposals/dbg_trainval_top100.json
|
| 8 |
+
eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json
|
| 9 |
+
gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
|
| 10 |
+
gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
|
| 11 |
+
train_caption_file: ['data/howto/captiondata/howto100m_train_mixlm.json', 'data/anet/captiondata/train_modified.json']
|
| 12 |
+
val_caption_file: 'data/anet/captiondata/val_1.json'
|
| 13 |
+
|
| 14 |
+
max_caption_len: 50
|
| 15 |
+
|
| 16 |
+
dict_file: data/howto/vocabulary_howto_rate2_mixlm_anet.json
|
| 17 |
+
vocab_size: 18884
|
| 18 |
+
# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
|
| 19 |
+
# vocab_size: 8531
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
train_proposal_type: gt
|
| 23 |
+
train_proposal_sample_num: 30
|
| 24 |
+
sample_method: nearest
|
| 25 |
+
|
| 26 |
+
epoch: 10
|
| 27 |
+
batch_size: 1
|
| 28 |
+
lr: 0.00005
|
| 29 |
+
learning_rate_decay_start: 8
|
| 30 |
+
learning_rate_decay_every: 3
|
| 31 |
+
learning_rate_decay_rate: 0.5
|
| 32 |
+
weight_decay: 0.0001
|
| 33 |
+
save_all_checkpoint: 0
|
| 34 |
+
|
| 35 |
+
num_queries: 100
|
| 36 |
+
dec_layers: 2
|
| 37 |
+
enc_layers: 2
|
| 38 |
+
transformer_ff_dim: 512
|
| 39 |
+
transformer_dropout_prob: 0.1
|
| 40 |
+
frame_embedding_num: 100
|
| 41 |
+
caption_decoder_type: light
|
| 42 |
+
att_hid_size: 0
|
| 43 |
+
|
| 44 |
+
with_box_refine: 1
|
| 45 |
+
|
| 46 |
+
fix_xcw: 1
|
| 47 |
+
set_cost_caption: 0
|
| 48 |
+
set_cost_giou: 4
|
| 49 |
+
set_cost_bbox: 0
|
| 50 |
+
set_cost_class: 2
|
| 51 |
+
self_iou_loss_coef: 0
|
| 52 |
+
#cost_alpha: 0.5
|
| 53 |
+
#cost_gamma: 1
|
| 54 |
+
#focal_alpha: 0.5
|
| 55 |
+
#focal_gamma: 1
|
| 56 |
+
caption_loss_coef: 2
|
| 57 |
+
giou_loss_coef: 4
|
| 58 |
+
bbox_loss_coef: 0
|
| 59 |
+
cls_loss_coef: 2
|
| 60 |
+
count_loss_coef: 0.5
|
| 61 |
+
max_eseq_length: 10
|
| 62 |
+
lloss_cross_entropy: 0
|
| 63 |
+
lloss_focal_loss: 0
|
| 64 |
+
lloss_gau_mask: 1
|
backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: yc2_tsn_pdvcl
|
| 2 |
+
|
| 3 |
+
visual_feature_type: ['resnet', 'bn']
|
| 4 |
+
visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/']
|
| 5 |
+
feature_dim: 3072
|
| 6 |
+
invalid_video_json: []
|
| 7 |
+
train_caption_file: ['data/howto/captiondata/howto100m_train_mixlm.json', 'data/yc2/captiondata/yc2_train.json']
|
| 8 |
+
val_caption_file: 'data/yc2/captiondata/yc2_val.json'
|
| 9 |
+
gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json']
|
| 10 |
+
gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json']
|
| 11 |
+
max_caption_len: 50
|
| 12 |
+
|
| 13 |
+
dict_file: data/howto/vocabulary_howto_rate2_mixlm_yc2.json
|
| 14 |
+
vocab_size: 17447
|
| 15 |
+
# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json
|
| 16 |
+
# vocab_size: 8531
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
train_proposal_type: gt
|
| 20 |
+
train_proposal_sample_num: 30
|
| 21 |
+
sample_method: nearest
|
| 22 |
+
|
| 23 |
+
epoch: 10
|
| 24 |
+
batch_size: 1
|
| 25 |
+
lr: 0.00005
|
| 26 |
+
learning_rate_decay_start: 8
|
| 27 |
+
learning_rate_decay_every: 3
|
| 28 |
+
learning_rate_decay_rate: 0.5
|
| 29 |
+
weight_decay: 0.0001
|
| 30 |
+
save_all_checkpoint: 0
|
| 31 |
+
|
| 32 |
+
num_queries: 100
|
| 33 |
+
dec_layers: 2
|
| 34 |
+
enc_layers: 2
|
| 35 |
+
transformer_ff_dim: 512
|
| 36 |
+
transformer_dropout_prob: 0.1
|
| 37 |
+
frame_embedding_num: 200
|
| 38 |
+
caption_decoder_type: light
|
| 39 |
+
att_hid_size: 0
|
| 40 |
+
|
| 41 |
+
with_box_refine: 1
|
| 42 |
+
|
| 43 |
+
fix_xcw: 1
|
| 44 |
+
set_cost_caption: 0
|
| 45 |
+
set_cost_giou: 4
|
| 46 |
+
set_cost_bbox: 0
|
| 47 |
+
set_cost_class: 2
|
| 48 |
+
self_iou_loss_coef: 0
|
| 49 |
+
#cost_alpha: 0.5
|
| 50 |
+
#cost_gamma: 1
|
| 51 |
+
#focal_alpha: 0.5
|
| 52 |
+
#focal_gamma: 1
|
| 53 |
+
caption_loss_coef: 2
|
| 54 |
+
giou_loss_coef: 4
|
| 55 |
+
bbox_loss_coef: 0
|
| 56 |
+
cls_loss_coef: 2
|
| 57 |
+
count_loss_coef: 0.5
|
| 58 |
+
max_eseq_length: 20
|
| 59 |
+
lloss_cross_entropy: 0
|
| 60 |
+
lloss_focal_loss: 0
|
| 61 |
+
lloss_gau_mask: 1
|
backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['CLIP']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 0
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 0
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
use_query_box_for_refine: 0
|
| 23 |
+
gt_proposal_sample_num: 12
|
| 24 |
+
|
| 25 |
+
use_anchor: 1
|
| 26 |
+
pretrained_language_model: CLIP
|
| 27 |
+
disable_contrastive_projection: 1
|
| 28 |
+
|
| 29 |
+
caption_decoder_type: standard
|
| 30 |
+
cap_nheads: 1
|
| 31 |
+
cap_dec_n_points: 4
|
| 32 |
+
cap_num_feature_levels: 4
|
| 33 |
+
soft_attention: 1
|
| 34 |
+
att_hid_size: 512
|
| 35 |
+
|
| 36 |
+
num_queries: 100
|
| 37 |
+
|
| 38 |
+
ec_alpha: 1.0
|
| 39 |
+
|
| 40 |
+
self_iou_loss_coef: 0.0
|
| 41 |
+
ref_rank_loss_coef: 0.0
|
| 42 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['CLIP']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 1
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 1
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
statistic_mode: mode
|
| 23 |
+
width_ratio: 1
|
| 24 |
+
window_size: 2
|
| 25 |
+
top_frames: 35
|
| 26 |
+
use_query_box_for_refine: 0
|
| 27 |
+
gt_proposal_sample_num: 12
|
| 28 |
+
|
| 29 |
+
use_anchor: 1
|
| 30 |
+
pretrained_language_model: CLIP
|
| 31 |
+
disable_contrastive_projection: 1
|
| 32 |
+
|
| 33 |
+
caption_decoder_type: standard
|
| 34 |
+
cap_nheads: 1
|
| 35 |
+
cap_dec_n_points: 4
|
| 36 |
+
cap_num_feature_levels: 4
|
| 37 |
+
soft_attention: 1
|
| 38 |
+
att_hid_size: 512
|
| 39 |
+
|
| 40 |
+
num_queries: 100
|
| 41 |
+
|
| 42 |
+
ec_alpha: 1.0
|
| 43 |
+
|
| 44 |
+
self_iou_loss_coef: 0.0
|
| 45 |
+
ref_rank_loss_coef: 0.0
|
| 46 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['UniVL']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
|
| 6 |
+
text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 0
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 0
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
use_query_box_for_refine: 0
|
| 23 |
+
gt_proposal_sample_num: 12
|
| 24 |
+
|
| 25 |
+
use_anchor: 1
|
| 26 |
+
pretrained_language_model: UniVL
|
| 27 |
+
disable_contrastive_projection: 1
|
| 28 |
+
|
| 29 |
+
caption_decoder_type: standard
|
| 30 |
+
cap_nheads: 1
|
| 31 |
+
cap_dec_n_points: 4
|
| 32 |
+
cap_num_feature_levels: 4
|
| 33 |
+
soft_attention: 1
|
| 34 |
+
att_hid_size: 512
|
| 35 |
+
|
| 36 |
+
num_queries: 100
|
| 37 |
+
|
| 38 |
+
ec_alpha: 1.0
|
| 39 |
+
|
| 40 |
+
self_iou_loss_coef: 0.0
|
| 41 |
+
ref_rank_loss_coef: 0.0
|
| 42 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['UniVL']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
|
| 6 |
+
text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 1
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 1
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
statistic_mode: mode
|
| 23 |
+
width_ratio: 1
|
| 24 |
+
window_size: 2
|
| 25 |
+
top_frames: 35
|
| 26 |
+
use_query_box_for_refine: 0
|
| 27 |
+
gt_proposal_sample_num: 12
|
| 28 |
+
|
| 29 |
+
use_anchor: 1
|
| 30 |
+
pretrained_language_model: UniVL
|
| 31 |
+
disable_contrastive_projection: 1
|
| 32 |
+
|
| 33 |
+
caption_decoder_type: standard
|
| 34 |
+
cap_nheads: 1
|
| 35 |
+
cap_dec_n_points: 4
|
| 36 |
+
cap_num_feature_levels: 4
|
| 37 |
+
soft_attention: 1
|
| 38 |
+
att_hid_size: 512
|
| 39 |
+
|
| 40 |
+
num_queries: 100
|
| 41 |
+
|
| 42 |
+
ec_alpha: 1.0
|
| 43 |
+
|
| 44 |
+
self_iou_loss_coef: 0.0
|
| 45 |
+
ref_rank_loss_coef: 0.0
|
| 46 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['CLIP']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 0
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 0
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
use_query_box_for_refine: 0
|
| 23 |
+
gt_proposal_sample_num: 12
|
| 24 |
+
|
| 25 |
+
use_anchor: 0
|
| 26 |
+
pretrained_language_model: CLIP
|
| 27 |
+
disable_contrastive_projection: 1
|
| 28 |
+
|
| 29 |
+
caption_decoder_type: standard
|
| 30 |
+
cap_nheads: 1
|
| 31 |
+
cap_dec_n_points: 4
|
| 32 |
+
cap_num_feature_levels: 4
|
| 33 |
+
soft_attention: 1
|
| 34 |
+
att_hid_size: 512
|
| 35 |
+
|
| 36 |
+
num_queries: 100
|
| 37 |
+
|
| 38 |
+
ec_alpha: 1.0
|
| 39 |
+
|
| 40 |
+
self_iou_loss_coef: 0.0
|
| 41 |
+
ref_rank_loss_coef: 0.0
|
| 42 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['CLIP']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 1
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 1
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
statistic_mode: mode
|
| 23 |
+
width_ratio: 1
|
| 24 |
+
window_size: 2
|
| 25 |
+
top_frames: 35
|
| 26 |
+
use_query_box_for_refine: 0
|
| 27 |
+
gt_proposal_sample_num: 12
|
| 28 |
+
|
| 29 |
+
use_anchor: 0
|
| 30 |
+
pretrained_language_model: CLIP
|
| 31 |
+
disable_contrastive_projection: 1
|
| 32 |
+
|
| 33 |
+
caption_decoder_type: standard
|
| 34 |
+
cap_nheads: 1
|
| 35 |
+
cap_dec_n_points: 4
|
| 36 |
+
cap_num_feature_levels: 4
|
| 37 |
+
soft_attention: 1
|
| 38 |
+
att_hid_size: 512
|
| 39 |
+
|
| 40 |
+
num_queries: 100
|
| 41 |
+
|
| 42 |
+
ec_alpha: 1.0
|
| 43 |
+
|
| 44 |
+
self_iou_loss_coef: 0.0
|
| 45 |
+
ref_rank_loss_coef: 0.0
|
| 46 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['UniVL']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
|
| 6 |
+
text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 0
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 0
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
use_query_box_for_refine: 0
|
| 23 |
+
gt_proposal_sample_num: 12
|
| 24 |
+
|
| 25 |
+
use_anchor: 0
|
| 26 |
+
pretrained_language_model: UniVL
|
| 27 |
+
disable_contrastive_projection: 1
|
| 28 |
+
|
| 29 |
+
caption_decoder_type: standard
|
| 30 |
+
cap_nheads: 1
|
| 31 |
+
cap_dec_n_points: 4
|
| 32 |
+
cap_num_feature_levels: 4
|
| 33 |
+
soft_attention: 1
|
| 34 |
+
att_hid_size: 512
|
| 35 |
+
|
| 36 |
+
num_queries: 100
|
| 37 |
+
|
| 38 |
+
ec_alpha: 1.0
|
| 39 |
+
|
| 40 |
+
self_iou_loss_coef: 0.0
|
| 41 |
+
ref_rank_loss_coef: 0.0
|
| 42 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['UniVL']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
|
| 6 |
+
text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 1
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 1
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
statistic_mode: mode
|
| 23 |
+
width_ratio: 1
|
| 24 |
+
window_size: 2
|
| 25 |
+
top_frames: 35
|
| 26 |
+
use_query_box_for_refine: 0
|
| 27 |
+
gt_proposal_sample_num: 12
|
| 28 |
+
|
| 29 |
+
use_anchor: 0
|
| 30 |
+
pretrained_language_model: UniVL
|
| 31 |
+
disable_contrastive_projection: 1
|
| 32 |
+
|
| 33 |
+
caption_decoder_type: standard
|
| 34 |
+
cap_nheads: 1
|
| 35 |
+
cap_dec_n_points: 4
|
| 36 |
+
cap_num_feature_levels: 4
|
| 37 |
+
soft_attention: 1
|
| 38 |
+
att_hid_size: 512
|
| 39 |
+
|
| 40 |
+
num_queries: 100
|
| 41 |
+
|
| 42 |
+
ec_alpha: 1.0
|
| 43 |
+
|
| 44 |
+
self_iou_loss_coef: 0.0
|
| 45 |
+
ref_rank_loss_coef: 0.0
|
| 46 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['CLIP']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 0
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 0
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
use_query_box_for_refine: 0
|
| 23 |
+
gt_proposal_sample_num: 12
|
| 24 |
+
|
| 25 |
+
use_anchor: 1
|
| 26 |
+
pretrained_language_model: CLIP
|
| 27 |
+
disable_contrastive_projection: 1
|
| 28 |
+
|
| 29 |
+
caption_decoder_type: standard
|
| 30 |
+
cap_nheads: 1
|
| 31 |
+
cap_dec_n_points: 4
|
| 32 |
+
cap_num_feature_levels: 4
|
| 33 |
+
soft_attention: 1
|
| 34 |
+
att_hid_size: 512
|
| 35 |
+
|
| 36 |
+
num_queries: 100
|
| 37 |
+
|
| 38 |
+
ec_alpha: 1.0
|
| 39 |
+
|
| 40 |
+
self_iou_loss_coef: 0.0
|
| 41 |
+
ref_rank_loss_coef: 0.0
|
| 42 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['CLIP']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 1
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 1
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
statistic_mode: mode
|
| 23 |
+
width_ratio: 1
|
| 24 |
+
window_size: 3
|
| 25 |
+
top_frames: 10
|
| 26 |
+
use_query_box_for_refine: 0
|
| 27 |
+
gt_proposal_sample_num: 12
|
| 28 |
+
|
| 29 |
+
use_anchor: 1
|
| 30 |
+
pretrained_language_model: CLIP
|
| 31 |
+
disable_contrastive_projection: 1
|
| 32 |
+
|
| 33 |
+
caption_decoder_type: standard
|
| 34 |
+
cap_nheads: 1
|
| 35 |
+
cap_dec_n_points: 4
|
| 36 |
+
cap_num_feature_levels: 4
|
| 37 |
+
soft_attention: 1
|
| 38 |
+
att_hid_size: 512
|
| 39 |
+
|
| 40 |
+
num_queries: 100
|
| 41 |
+
|
| 42 |
+
ec_alpha: 1.0
|
| 43 |
+
|
| 44 |
+
self_iou_loss_coef: 0.0
|
| 45 |
+
ref_rank_loss_coef: 0.0
|
| 46 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['UniVL']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 6 |
+
text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 0
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 0
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
use_query_box_for_refine: 0
|
| 23 |
+
gt_proposal_sample_num: 12
|
| 24 |
+
|
| 25 |
+
use_anchor: 1
|
| 26 |
+
pretrained_language_model: UniVL
|
| 27 |
+
disable_contrastive_projection: 1
|
| 28 |
+
|
| 29 |
+
caption_decoder_type: standard
|
| 30 |
+
cap_nheads: 1
|
| 31 |
+
cap_dec_n_points: 4
|
| 32 |
+
cap_num_feature_levels: 4
|
| 33 |
+
soft_attention: 1
|
| 34 |
+
att_hid_size: 512
|
| 35 |
+
|
| 36 |
+
num_queries: 100
|
| 37 |
+
|
| 38 |
+
ec_alpha: 1.0
|
| 39 |
+
|
| 40 |
+
self_iou_loss_coef: 0.0
|
| 41 |
+
ref_rank_loss_coef: 0.0
|
| 42 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['UniVL']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 6 |
+
text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 1
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 1
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
statistic_mode: mode
|
| 23 |
+
width_ratio: 1
|
| 24 |
+
window_size: 3
|
| 25 |
+
top_frames: 10
|
| 26 |
+
use_query_box_for_refine: 0
|
| 27 |
+
gt_proposal_sample_num: 12
|
| 28 |
+
|
| 29 |
+
use_anchor: 1
|
| 30 |
+
pretrained_language_model: UniVL
|
| 31 |
+
disable_contrastive_projection: 1
|
| 32 |
+
|
| 33 |
+
caption_decoder_type: standard
|
| 34 |
+
cap_nheads: 1
|
| 35 |
+
cap_dec_n_points: 4
|
| 36 |
+
cap_num_feature_levels: 4
|
| 37 |
+
soft_attention: 1
|
| 38 |
+
att_hid_size: 512
|
| 39 |
+
|
| 40 |
+
num_queries: 100
|
| 41 |
+
|
| 42 |
+
ec_alpha: 1.0
|
| 43 |
+
|
| 44 |
+
self_iou_loss_coef: 0.0
|
| 45 |
+
ref_rank_loss_coef: 0.0
|
| 46 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['CLIP']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 0
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 0
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
use_query_box_for_refine: 0
|
| 23 |
+
gt_proposal_sample_num: 12
|
| 24 |
+
|
| 25 |
+
use_anchor: 0
|
| 26 |
+
pretrained_language_model: CLIP
|
| 27 |
+
disable_contrastive_projection: 1
|
| 28 |
+
|
| 29 |
+
caption_decoder_type: standard
|
| 30 |
+
cap_nheads: 1
|
| 31 |
+
cap_dec_n_points: 4
|
| 32 |
+
cap_num_feature_levels: 4
|
| 33 |
+
soft_attention: 1
|
| 34 |
+
att_hid_size: 512
|
| 35 |
+
|
| 36 |
+
num_queries: 100
|
| 37 |
+
|
| 38 |
+
ec_alpha: 1.0
|
| 39 |
+
|
| 40 |
+
self_iou_loss_coef: 0.0
|
| 41 |
+
ref_rank_loss_coef: 0.0
|
| 42 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['CLIP']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 1
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 1
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
statistic_mode: mode
|
| 23 |
+
width_ratio: 1
|
| 24 |
+
window_size: 3
|
| 25 |
+
top_frames: 10
|
| 26 |
+
use_query_box_for_refine: 0
|
| 27 |
+
gt_proposal_sample_num: 12
|
| 28 |
+
|
| 29 |
+
use_anchor: 0
|
| 30 |
+
pretrained_language_model: CLIP
|
| 31 |
+
disable_contrastive_projection: 1
|
| 32 |
+
|
| 33 |
+
caption_decoder_type: standard
|
| 34 |
+
cap_nheads: 1
|
| 35 |
+
cap_dec_n_points: 4
|
| 36 |
+
cap_num_feature_levels: 4
|
| 37 |
+
soft_attention: 1
|
| 38 |
+
att_hid_size: 512
|
| 39 |
+
|
| 40 |
+
num_queries: 100
|
| 41 |
+
|
| 42 |
+
ec_alpha: 1.0
|
| 43 |
+
|
| 44 |
+
self_iou_loss_coef: 0.0
|
| 45 |
+
ref_rank_loss_coef: 0.0
|
| 46 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['CLIP']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 1
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 1
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
statistic_mode: mode
|
| 23 |
+
width_ratio: 1
|
| 24 |
+
window_size: 3
|
| 25 |
+
top_frames: 10
|
| 26 |
+
use_query_box_for_refine: 0
|
| 27 |
+
gt_proposal_sample_num: 12
|
| 28 |
+
|
| 29 |
+
use_anchor: 0
|
| 30 |
+
pretrained_language_model: CLIP
|
| 31 |
+
disable_contrastive_projection: 1
|
| 32 |
+
|
| 33 |
+
caption_decoder_type: standard
|
| 34 |
+
cap_nheads: 1
|
| 35 |
+
cap_dec_n_points: 4
|
| 36 |
+
cap_num_feature_levels: 4
|
| 37 |
+
soft_attention: 1
|
| 38 |
+
att_hid_size: 512
|
| 39 |
+
|
| 40 |
+
num_queries: 100
|
| 41 |
+
|
| 42 |
+
ec_alpha: 1.0
|
| 43 |
+
|
| 44 |
+
self_iou_loss_coef: 0.0
|
| 45 |
+
ref_rank_loss_coef: 0.0
|
| 46 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['UniVL']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 6 |
+
text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 0
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 0
|
| 17 |
+
refine_pseudo_stage_num: 1
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
use_query_box_for_refine: 0
|
| 23 |
+
gt_proposal_sample_num: 12
|
| 24 |
+
|
| 25 |
+
use_anchor: 0
|
| 26 |
+
pretrained_language_model: UniVL
|
| 27 |
+
disable_contrastive_projection: 1
|
| 28 |
+
|
| 29 |
+
caption_decoder_type: standard
|
| 30 |
+
cap_nheads: 1
|
| 31 |
+
cap_dec_n_points: 4
|
| 32 |
+
cap_num_feature_levels: 4
|
| 33 |
+
soft_attention: 1
|
| 34 |
+
att_hid_size: 512
|
| 35 |
+
|
| 36 |
+
num_queries: 100
|
| 37 |
+
|
| 38 |
+
ec_alpha: 1.0
|
| 39 |
+
|
| 40 |
+
self_iou_loss_coef: 0.0
|
| 41 |
+
ref_rank_loss_coef: 0.0
|
| 42 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml
CHANGED
|
@@ -3,7 +3,7 @@ base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
|
|
| 3 |
|
| 4 |
visual_feature_type: ['UniVL']
|
| 5 |
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 6 |
-
text_feature_folder: ['/
|
| 7 |
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 8 |
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 9 |
feature_dim: 768
|
|
|
|
| 3 |
|
| 4 |
visual_feature_type: ['UniVL']
|
| 5 |
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 7 |
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 8 |
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 9 |
feature_dim: 768
|
backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml
|
| 3 |
+
|
| 4 |
+
visual_feature_type: ['UniVL']
|
| 5 |
+
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 6 |
+
text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 7 |
+
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 8 |
+
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 9 |
+
feature_dim: 768
|
| 10 |
+
hidden_dim: 512
|
| 11 |
+
|
| 12 |
+
use_pseudo_box: 1
|
| 13 |
+
pseudo_box_aug: 1
|
| 14 |
+
pseudo_box_aug_num: 5
|
| 15 |
+
pseudo_box_aug_ratio: 0.3
|
| 16 |
+
refine_pseudo_box: 1
|
| 17 |
+
refine_pseudo_stage_num: 2
|
| 18 |
+
merge_k_boxes: 3
|
| 19 |
+
pseudo_box_type: similarity_op_order_v2
|
| 20 |
+
iteration: 3
|
| 21 |
+
width_th: 2
|
| 22 |
+
statistic_mode: mode
|
| 23 |
+
width_ratio: 1
|
| 24 |
+
window_size: 3
|
| 25 |
+
top_frames: 10
|
| 26 |
+
use_query_box_for_refine: 0
|
| 27 |
+
gt_proposal_sample_num: 30
|
| 28 |
+
|
| 29 |
+
use_anchor: 0
|
| 30 |
+
pretrained_language_model: UniVL
|
| 31 |
+
disable_contrastive_projection: 1
|
| 32 |
+
|
| 33 |
+
caption_decoder_type: standard
|
| 34 |
+
cap_nheads: 1
|
| 35 |
+
cap_dec_n_points: 4
|
| 36 |
+
cap_num_feature_levels: 4
|
| 37 |
+
soft_attention: 1
|
| 38 |
+
att_hid_size: 512
|
| 39 |
+
|
| 40 |
+
num_queries: 100
|
| 41 |
+
|
| 42 |
+
ec_alpha: 1.0
|
| 43 |
+
|
| 44 |
+
self_iou_loss_coef: 0.0
|
| 45 |
+
ref_rank_loss_coef: 0.0
|
| 46 |
+
contrastive_loss_start_coef: 0.0
|
backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml
CHANGED
|
@@ -3,7 +3,7 @@ base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml
|
|
| 3 |
|
| 4 |
visual_feature_type: ['UniVL']
|
| 5 |
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 6 |
-
text_feature_folder: ['/
|
| 7 |
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 8 |
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 9 |
feature_dim: 768
|
|
|
|
| 3 |
|
| 4 |
visual_feature_type: ['UniVL']
|
| 5 |
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 6 |
+
text_feature_folder: ['/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 7 |
visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/']
|
| 8 |
text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/']
|
| 9 |
feature_dim: 768
|
backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
-
base_cfg_path: cfgs_base/howto/
|
| 3 |
|
| 4 |
visual_feature_type: ['UniVL']
|
| 5 |
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
|
|
|
|
| 1 |
id: refine_aug(5,0.3)_top3_1stage
|
| 2 |
+
base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml
|
| 3 |
|
| 4 |
visual_feature_type: ['UniVL']
|
| 5 |
visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual']
|
backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 30
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 1
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 30
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 1
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 25
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 1
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id: ''
|
| 2 |
+
base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
pseudo_box_aug_num: 8
|
| 6 |
+
pseudo_box_aug_ratio: 0.02
|
| 7 |
+
pseudo_box_aug_mode: random_range
|
| 8 |
+
refine_pseudo_box: 1
|
| 9 |
+
refine_pseudo_stage_num: 2
|
| 10 |
+
merge_k_boxes: 3
|
| 11 |
+
pseudo_box_type: similarity_op_order_v2
|
| 12 |
+
top_frames: 25
|
| 13 |
+
width_ratio: 1
|
| 14 |
+
iteration: 3
|
| 15 |
+
width_th: 1
|
| 16 |
+
use_query_box_for_refine: 0
|
| 17 |
+
gt_proposal_sample_num: 20
|
| 18 |
+
mil_loss_coef: 0
|
| 19 |
+
merge_criterion: ins_cap_topk
|
backup/change_config_add.py
CHANGED
|
@@ -12,10 +12,12 @@ args = parser.parse_args()
|
|
| 12 |
|
| 13 |
|
| 14 |
# Define the folder containing YAML files
|
| 15 |
-
folder_path = 'cfgs_ref'
|
| 16 |
# folder_path = 'cfgs_base/anet'
|
| 17 |
# folder_path = 'cfgs'
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
|
|
@@ -24,18 +26,18 @@ file_filter = 'yc2'
|
|
| 24 |
# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj'
|
| 25 |
# find_string = 'data/yc2/captiondata/yc2'
|
| 26 |
# find_string = "/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text"
|
| 27 |
-
find_string = "
|
| 28 |
# find_string = "pdvc_mode: 0"
|
| 29 |
|
| 30 |
# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'
|
| 31 |
# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'
|
| 32 |
# replace_string = 'data/tasty/captiondata/tasty'
|
| 33 |
# replace_string = "cfgs_base/tasty/tasty_tsn_pdvcl.yml"
|
| 34 |
-
replace_string = "
|
| 35 |
# replace_string = "pdvc_mode: 1"
|
| 36 |
|
| 37 |
-
old_name = '
|
| 38 |
-
new_name = '
|
| 39 |
|
| 40 |
def replace_yaml(yaml_file_path, new_file_path, old_string, new_string):
|
| 41 |
# Read the YAML file as text
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
# Define the folder containing YAML files
|
| 15 |
+
# folder_path = 'cfgs_ref'
|
| 16 |
# folder_path = 'cfgs_base/anet'
|
| 17 |
# folder_path = 'cfgs'
|
| 18 |
+
folder_path = 'cfgs_yc2_ft_perc'
|
| 19 |
+
|
| 20 |
+
file_filter = ''
|
| 21 |
|
| 22 |
|
| 23 |
|
|
|
|
| 26 |
# find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj'
|
| 27 |
# find_string = 'data/yc2/captiondata/yc2'
|
| 28 |
# find_string = "/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text"
|
| 29 |
+
find_string = "ft_gt_percent: 0.25"
|
| 30 |
# find_string = "pdvc_mode: 0"
|
| 31 |
|
| 32 |
# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual'
|
| 33 |
# replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text'
|
| 34 |
# replace_string = 'data/tasty/captiondata/tasty'
|
| 35 |
# replace_string = "cfgs_base/tasty/tasty_tsn_pdvcl.yml"
|
| 36 |
+
replace_string = "ft_gt_percent: 0.75"
|
| 37 |
# replace_string = "pdvc_mode: 1"
|
| 38 |
|
| 39 |
+
old_name = 'perc0.25'
|
| 40 |
+
new_name = 'perc0.75'
|
| 41 |
|
| 42 |
def replace_yaml(yaml_file_path, new_file_path, old_string, new_string):
|
| 43 |
# Read the YAML file as text
|
backup/misc/__pycache__/utils.cpython-38.pyc
CHANGED
|
Binary files a/backup/misc/__pycache__/utils.cpython-38.pyc and b/backup/misc/__pycache__/utils.cpython-38.pyc differ
|
|
|
backup/misc/utils.py
CHANGED
|
@@ -241,7 +241,7 @@ def build_folder(opt):
|
|
| 241 |
save_foldername += '_C-layer'
|
| 242 |
if 'puyu' in opt.train_caption_file[0]:
|
| 243 |
save_foldername += '_puyu'
|
| 244 |
-
elif '
|
| 245 |
save_foldername += '_mixlm'
|
| 246 |
|
| 247 |
if opt.id != '':
|
|
@@ -281,8 +281,13 @@ def build_folder(opt):
|
|
| 281 |
return save_folder
|
| 282 |
|
| 283 |
|
| 284 |
-
def backup_envir(save_folder):
|
|
|
|
|
|
|
| 285 |
backup_folders = ['cfgs_base', 'cfgs', 'misc', 'pdvc']
|
|
|
|
|
|
|
|
|
|
| 286 |
backup_files = glob.glob('./*.py')
|
| 287 |
for folder in backup_folders:
|
| 288 |
shutil.copytree(folder, os.path.join(save_folder, 'backup', folder))
|
|
|
|
| 241 |
save_foldername += '_C-layer'
|
| 242 |
if 'puyu' in opt.train_caption_file[0]:
|
| 243 |
save_foldername += '_puyu'
|
| 244 |
+
elif 'mixlm' in opt.train_caption_file[0]:
|
| 245 |
save_foldername += '_mixlm'
|
| 246 |
|
| 247 |
if opt.id != '':
|
|
|
|
| 281 |
return save_folder
|
| 282 |
|
| 283 |
|
| 284 |
+
def backup_envir(save_folder, opt):
|
| 285 |
+
cfg_path = opt.cfg_path
|
| 286 |
+
dir_path = os.path.dirname(cfg_path)
|
| 287 |
backup_folders = ['cfgs_base', 'cfgs', 'misc', 'pdvc']
|
| 288 |
+
if dir_path not in backup_folders:
|
| 289 |
+
backup_folders.append(dir_path)
|
| 290 |
+
|
| 291 |
backup_files = glob.glob('./*.py')
|
| 292 |
for folder in backup_folders:
|
| 293 |
shutil.copytree(folder, os.path.join(save_folder, 'backup', folder))
|
backup/opts.py
CHANGED
|
@@ -269,6 +269,7 @@ def parse_opts():
|
|
| 269 |
|
| 270 |
# reranking
|
| 271 |
parser.add_argument('--ec_alpha', type=float, default=0.3)
|
|
|
|
| 272 |
args = parser.parse_args()
|
| 273 |
|
| 274 |
if args.cfg_path:
|
|
|
|
| 269 |
|
| 270 |
# reranking
|
| 271 |
parser.add_argument('--ec_alpha', type=float, default=0.3)
|
| 272 |
+
parser.add_argument('--test', action='store_true', default=False)
|
| 273 |
args = parser.parse_args()
|
| 274 |
|
| 275 |
if args.cfg_path:
|
backup/pdvc/__pycache__/pdvc.cpython-38.pyc
CHANGED
|
Binary files a/backup/pdvc/__pycache__/pdvc.cpython-38.pyc and b/backup/pdvc/__pycache__/pdvc.cpython-38.pyc differ
|
|
|
backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc
CHANGED
|
Binary files a/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc and b/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc differ
|
|
|
backup/pdvc/pdvc.py
CHANGED
|
@@ -316,6 +316,8 @@ class PDVC(nn.Module):
|
|
| 316 |
video_step_alignment = [align_frame_into_steps_op_v1(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)]
|
| 317 |
elif self.opt.pseudo_box_type == 'similarity_op_order_v2':
|
| 318 |
video_step_alignment = [align_frame_into_steps_op_order_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)]
|
|
|
|
|
|
|
| 319 |
elif self.opt.pseudo_box_type == 'weight_sim':
|
| 320 |
if self.opt.width_ratio < 0:
|
| 321 |
video_step_alignment = [step_retrieval_weight_sim(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
|
|
|
|
| 316 |
video_step_alignment = [align_frame_into_steps_op_v1(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)]
|
| 317 |
elif self.opt.pseudo_box_type == 'similarity_op_order_v2':
|
| 318 |
video_step_alignment = [align_frame_into_steps_op_order_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)]
|
| 319 |
+
elif self.opt.pseudo_box_type == 'similarity_op_v2':
|
| 320 |
+
video_step_alignment = [align_frame_into_steps_op_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)]
|
| 321 |
elif self.opt.pseudo_box_type == 'weight_sim':
|
| 322 |
if self.opt.width_ratio < 0:
|
| 323 |
video_step_alignment = [step_retrieval_weight_sim(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \
|
backup/pdvc/video_segmentation.py
CHANGED
|
@@ -632,6 +632,65 @@ def align_frame_into_steps_op_order_v2(frame_features, step_features, topk=15, t
|
|
| 632 |
|
| 633 |
return (best_bbox, min_loss)
|
| 634 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 635 |
|
| 636 |
|
| 637 |
# pesudo box 4: based on fixed window. the result is bad. give up
|
|
|
|
| 632 |
|
| 633 |
return (best_bbox, min_loss)
|
| 634 |
|
| 635 |
+
def align_frame_into_steps_op_v2(frame_features, step_features, topk=15, threshold=0.5, ratio=1, iteration=3):
|
| 636 |
+
# breakpoint()
|
| 637 |
+
if step_features.shape[0] == 0:
|
| 638 |
+
return -np.ones(frame_features.shape[0])
|
| 639 |
+
|
| 640 |
+
sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu()
|
| 641 |
+
sorted_index = torch.argsort(-sim, dim=1)
|
| 642 |
+
top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])]
|
| 643 |
+
top_values_list_global = [sim[i][top_indices_list_global[i]] for i in range(sim.shape[0])]
|
| 644 |
+
|
| 645 |
+
|
| 646 |
+
uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0])
|
| 647 |
+
|
| 648 |
+
iter_bbox_loss = {}
|
| 649 |
+
for iter in range(iteration):
|
| 650 |
+
# if iter == 0:
|
| 651 |
+
# refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio)
|
| 652 |
+
# else:
|
| 653 |
+
# refined_uniform_boxes = expand_window(bbox, frame_features.shape[0], step_features.shape[0], ratio) # last bbox
|
| 654 |
+
|
| 655 |
+
|
| 656 |
+
# global: from all frames, local: from refined uniform boxes
|
| 657 |
+
|
| 658 |
+
# top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])]
|
| 659 |
+
# top_values_list_local = [sim[i][top_indices_list_local[i]] for i in range(sim.shape[0])]
|
| 660 |
+
|
| 661 |
+
# size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])]
|
| 662 |
+
# if sum(size_local) < (topk-2) * len(size_local):
|
| 663 |
+
# top_indices_list = top_indices_list_global
|
| 664 |
+
# top_values_list = top_values_list_global
|
| 665 |
+
# else:
|
| 666 |
+
# top_indices_list = top_indices_list_local
|
| 667 |
+
# top_values_list = top_values_list_local
|
| 668 |
+
|
| 669 |
+
# top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])]
|
| 670 |
+
|
| 671 |
+
bbox = []
|
| 672 |
+
for i in range(len(top_indices_list_global)):
|
| 673 |
+
filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_values_list_global[i].tolist(), threshold)
|
| 674 |
+
if len(filtered_indices) == 0:
|
| 675 |
+
filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_indices_list_global[i].tolist(), threshold)
|
| 676 |
+
if len(filtered_indices) == 0:
|
| 677 |
+
bbox.append(uniform_boxes[i])
|
| 678 |
+
continue
|
| 679 |
+
bbox.append([min(filtered_indices), max(filtered_indices)])
|
| 680 |
+
|
| 681 |
+
# compute bbox loss
|
| 682 |
+
bbox_loss_list = [compute_bbox_loss(top_indices_list_global[i], bbox[i], top_values_list_global[i]) for i in range(len(top_indices_list_global))]
|
| 683 |
+
bbox_loss = sum(bbox_loss_list)
|
| 684 |
+
iter_bbox_loss[iter] = {'loss': bbox_loss, 'bbox': bbox}
|
| 685 |
+
|
| 686 |
+
# select the minimum bbox loss and bbox as output
|
| 687 |
+
min_loss_iter = min(iter_bbox_loss.keys(), key=lambda k: iter_bbox_loss[k]['loss'])
|
| 688 |
+
min_loss = iter_bbox_loss[min_loss_iter]['loss']
|
| 689 |
+
best_bbox = iter_bbox_loss[min_loss_iter]['bbox']
|
| 690 |
+
|
| 691 |
+
|
| 692 |
+
return (best_bbox, min_loss)
|
| 693 |
+
|
| 694 |
|
| 695 |
|
| 696 |
# pesudo box 4: based on fixed window. the result is bad. give up
|
backup/train.py
CHANGED
|
@@ -48,8 +48,8 @@ def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/
|
|
| 48 |
if len(opt.train_caption_file) == 2:
|
| 49 |
if 'puyu' in opt.train_caption_file[0]:
|
| 50 |
elements.append('howto_puyu')
|
| 51 |
-
elif '
|
| 52 |
-
elements.append('
|
| 53 |
else:
|
| 54 |
elements.append('howto_llama2')
|
| 55 |
elements.append('howto')
|
|
@@ -65,8 +65,8 @@ def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/
|
|
| 65 |
elif 'howto' in opt.train_caption_file:
|
| 66 |
if 'puyu' in opt.train_caption_file:
|
| 67 |
elements.append('howto_puyu')
|
| 68 |
-
elif '
|
| 69 |
-
elements.append('
|
| 70 |
else:
|
| 71 |
elements.append('howto_llama2')
|
| 72 |
# elements.append('howto')
|
|
@@ -114,8 +114,97 @@ def train(opt):
|
|
| 114 |
logger = create_logger(save_folder, 'train.log')
|
| 115 |
tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
if not opt.start_from:
|
| 118 |
-
backup_envir(save_folder)
|
| 119 |
logger.info('backup evironment completed !')
|
| 120 |
|
| 121 |
saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
|
|
@@ -136,6 +225,8 @@ def train(opt):
|
|
| 136 |
if prev_opt.get(opt_name) != vars(opt).get(opt_name):
|
| 137 |
logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
|
| 138 |
vars(opt).get(opt_name)))
|
|
|
|
|
|
|
| 139 |
if len(opt.visual_feature_folder) == 2:
|
| 140 |
train_dataset_1 = PropSeqDataset(opt.train_caption_file[0],
|
| 141 |
[opt.visual_feature_folder[0]],
|
|
@@ -195,7 +286,6 @@ def train(opt):
|
|
| 195 |
model.train()
|
| 196 |
|
| 197 |
# try to load saved pbox
|
| 198 |
-
saved_path = construct_save_path(opt)
|
| 199 |
if os.path.exists(saved_path):
|
| 200 |
try:
|
| 201 |
with open(saved_path, 'r') as f:
|
|
@@ -322,10 +412,11 @@ def train(opt):
|
|
| 322 |
# if dt['video_key'][0] != 'LGArj9Do0xc':
|
| 323 |
# continue
|
| 324 |
# # for fast debugging
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
|
|
|
| 329 |
# if trained_samples < 1714:
|
| 330 |
# trained_samples += 1
|
| 331 |
# continue
|
|
@@ -486,7 +577,7 @@ def train(opt):
|
|
| 486 |
|
| 487 |
epoch += 1
|
| 488 |
|
| 489 |
-
if epoch == 1 and model.pseudo_boxes is not None and '
|
| 490 |
# save the pseudo boxes
|
| 491 |
pbox_save_path = construct_save_path(opt)
|
| 492 |
if not os.path.exists(pbox_save_path):
|
|
|
|
| 48 |
if len(opt.train_caption_file) == 2:
|
| 49 |
if 'puyu' in opt.train_caption_file[0]:
|
| 50 |
elements.append('howto_puyu')
|
| 51 |
+
elif 'mixlm' in opt.train_caption_file[0]:
|
| 52 |
+
elements.append('howto_mixlm')
|
| 53 |
else:
|
| 54 |
elements.append('howto_llama2')
|
| 55 |
elements.append('howto')
|
|
|
|
| 65 |
elif 'howto' in opt.train_caption_file:
|
| 66 |
if 'puyu' in opt.train_caption_file:
|
| 67 |
elements.append('howto_puyu')
|
| 68 |
+
elif 'mixlm' in opt.train_caption_file:
|
| 69 |
+
elements.append('howto_mixlm')
|
| 70 |
else:
|
| 71 |
elements.append('howto_llama2')
|
| 72 |
# elements.append('howto')
|
|
|
|
| 114 |
logger = create_logger(save_folder, 'train.log')
|
| 115 |
tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
|
| 116 |
|
| 117 |
+
# if use mixlm model
|
| 118 |
+
saved_path = construct_save_path(opt)
|
| 119 |
+
|
| 120 |
+
if 'mixlm' in saved_path:
|
| 121 |
+
# text_feature_folder_mixlm = os.path.join(save_folder, 'text_feature')
|
| 122 |
+
mixlm_pbox_path = construct_save_path(opt, save_folder='test').replace('.json', '').replace('test/', '')
|
| 123 |
+
text_feature_folder_mixlm = os.path.join('/mnt/data/Gvlab/wuhao/code/tmp', 'mix_text_feature', mixlm_pbox_path)
|
| 124 |
+
os.makedirs(text_feature_folder_mixlm, exist_ok=True)
|
| 125 |
+
if 'clip' in save_folder or 'CLIP' in save_folder:
|
| 126 |
+
text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj')
|
| 127 |
+
text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip'
|
| 128 |
+
elif 'univl' in save_folder or 'UniVL' in save_folder or 'Uni' in save_folder:
|
| 129 |
+
text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text')
|
| 130 |
+
text_feature_folder_puyu = '/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu'
|
| 131 |
+
|
| 132 |
+
if not os.path.exists(saved_path):
|
| 133 |
+
llama2_pbox_path = saved_path.replace('mixlm', 'llama2')
|
| 134 |
+
puyu_pbox_path = saved_path.replace('mixlm', 'puyu')
|
| 135 |
+
with open(llama2_pbox_path, 'r') as f:
|
| 136 |
+
llama2_pbox = json.load(f)
|
| 137 |
+
with open(puyu_pbox_path, 'r') as f:
|
| 138 |
+
puyu_pbox = json.load(f)
|
| 139 |
+
|
| 140 |
+
mixlm_pbox = {}
|
| 141 |
+
for video_key in llama2_pbox.keys():
|
| 142 |
+
if llama2_pbox.get(video_key) is None and puyu_pbox.get(video_key) is None:
|
| 143 |
+
mixlm_pbox[video_key] = None
|
| 144 |
+
elif llama2_pbox.get(video_key) is None:
|
| 145 |
+
mixlm_pbox[video_key] = {'box': puyu_pbox[video_key]['box'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'}
|
| 146 |
+
elif puyu_pbox.get(video_key) is None:
|
| 147 |
+
mixlm_pbox[video_key] = {'box': llama2_pbox[video_key]['box'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'}
|
| 148 |
+
else:
|
| 149 |
+
if llama2_pbox[video_key]['loss'] < puyu_pbox[video_key]['loss']:
|
| 150 |
+
mixlm_pbox[video_key] = {'box': llama2_pbox[video_key]['box'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'}
|
| 151 |
+
else:
|
| 152 |
+
mixlm_pbox[video_key] = {'box': puyu_pbox[video_key]['box'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'}
|
| 153 |
+
with open(saved_path, 'w') as f:
|
| 154 |
+
json.dump(mixlm_pbox, f)
|
| 155 |
+
|
| 156 |
+
with open(saved_path, 'r') as f:
|
| 157 |
+
mixlm_pbox = json.load(f)
|
| 158 |
+
with open('data/howto/captiondata/howto100m_train_puyu.json', 'r') as f:
|
| 159 |
+
meta_puyu = json.load(f)
|
| 160 |
+
with open('data/howto/captiondata/howto100m_train.json', 'r') as f:
|
| 161 |
+
meta_llama2 = json.load(f)
|
| 162 |
+
|
| 163 |
+
meta_mixlm = {}
|
| 164 |
+
for video_key in mixlm_pbox.keys():
|
| 165 |
+
if mixlm_pbox.get(video_key) is not None and (meta_llama2.get(video_key) is not None or meta_puyu.get(video_key) is not None):
|
| 166 |
+
if mixlm_pbox[video_key]['llm'] == 'llama2':
|
| 167 |
+
meta_mixlm[video_key] = meta_llama2[video_key]
|
| 168 |
+
llama2_feature_path = os.path.join(text_feature_folder_llama2, video_key + '.npy')
|
| 169 |
+
if not os.path.exists(llama2_feature_path):
|
| 170 |
+
continue
|
| 171 |
+
# if os.path.exists(llama2_feature_path):
|
| 172 |
+
# os.unlink(llama2_feature_path)
|
| 173 |
+
# if not os.path.exists(llama2_feature_path):
|
| 174 |
+
# os.symlink(llama2_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy'))
|
| 175 |
+
soft_link_path = os.path.join(text_feature_folder_mixlm, video_key + '.npy')
|
| 176 |
+
# if os.path.exists(soft_link_path):
|
| 177 |
+
# os.unlink(soft_link_path)
|
| 178 |
+
if not os.path.exists(soft_link_path):
|
| 179 |
+
# print(os.path.exists(soft_link_path), os.path.exists(llama2_feature_path))
|
| 180 |
+
os.symlink(llama2_feature_path, soft_link_path)
|
| 181 |
+
# text_feature = np.load(llama2_feature_path)
|
| 182 |
+
# if text_feature.shape[0] != len(meta_llama2[video_key]['sentences']):
|
| 183 |
+
# print(f"{video_key} has {text_feature.shape[0]} sentences, but {len(meta_llama2[video_key]['sentences'])} sentences found in meta file")
|
| 184 |
+
else:
|
| 185 |
+
meta_mixlm[video_key] = meta_puyu[video_key]
|
| 186 |
+
puyu_feature_path = os.path.join(text_feature_folder_puyu, video_key + '.npy')
|
| 187 |
+
if not os.path.exists(puyu_feature_path):
|
| 188 |
+
continue
|
| 189 |
+
|
| 190 |
+
soft_link_path = os.path.join(text_feature_folder_mixlm, video_key + '.npy')
|
| 191 |
+
|
| 192 |
+
# if os.path.exists(soft_link_path):
|
| 193 |
+
# os.unlink(soft_link_path)
|
| 194 |
+
if not os.path.exists(soft_link_path):
|
| 195 |
+
os.symlink(puyu_feature_path, soft_link_path)
|
| 196 |
+
# text_feature = np.load(puyu_feature_path)
|
| 197 |
+
# if text_feature.shape[0] != len(meta_puyu[video_key]['sentences']):
|
| 198 |
+
# print(f"{video_key} has {text_feature.shape[0]} sentences, but {len(meta_puyu[video_key]['sentences'])} sentences found in meta file")
|
| 199 |
+
with open(os.path.join(save_folder, 'train_caption_mixlm.json'), 'w') as f:
|
| 200 |
+
json.dump(meta_mixlm, f)
|
| 201 |
+
opt.train_caption_file[0] = os.path.join(save_folder, 'train_caption_mixlm.json')
|
| 202 |
+
opt.text_feature_folder[0] = text_feature_folder_mixlm
|
| 203 |
+
# pass
|
| 204 |
+
|
| 205 |
+
|
| 206 |
if not opt.start_from:
|
| 207 |
+
backup_envir(save_folder, opt)
|
| 208 |
logger.info('backup evironment completed !')
|
| 209 |
|
| 210 |
saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
|
|
|
|
| 225 |
if prev_opt.get(opt_name) != vars(opt).get(opt_name):
|
| 226 |
logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name),
|
| 227 |
vars(opt).get(opt_name)))
|
| 228 |
+
print(opt.text_feature_folder)
|
| 229 |
+
print(opt.train_caption_file)
|
| 230 |
if len(opt.visual_feature_folder) == 2:
|
| 231 |
train_dataset_1 = PropSeqDataset(opt.train_caption_file[0],
|
| 232 |
[opt.visual_feature_folder[0]],
|
|
|
|
| 286 |
model.train()
|
| 287 |
|
| 288 |
# try to load saved pbox
|
|
|
|
| 289 |
if os.path.exists(saved_path):
|
| 290 |
try:
|
| 291 |
with open(saved_path, 'r') as f:
|
|
|
|
| 412 |
# if dt['video_key'][0] != 'LGArj9Do0xc':
|
| 413 |
# continue
|
| 414 |
# # for fast debugging
|
| 415 |
+
if opt.test:
|
| 416 |
+
if trained_samples > 5:
|
| 417 |
+
break
|
| 418 |
+
else:
|
| 419 |
+
trained_samples += 1
|
| 420 |
# if trained_samples < 1714:
|
| 421 |
# trained_samples += 1
|
| 422 |
# continue
|
|
|
|
| 577 |
|
| 578 |
epoch += 1
|
| 579 |
|
| 580 |
+
if epoch == 1 and model.pseudo_boxes is not None and 'mixlm' not in opt.train_caption_file[0]:
|
| 581 |
# save the pseudo boxes
|
| 582 |
pbox_save_path = construct_save_path(opt)
|
| 583 |
if not os.path.exists(pbox_save_path):
|
backup/train_fewshot.py
CHANGED
|
@@ -68,7 +68,7 @@ def train(opt):
|
|
| 68 |
tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
|
| 69 |
|
| 70 |
if not opt.start_from:
|
| 71 |
-
backup_envir(save_folder)
|
| 72 |
logger.info('backup evironment completed !')
|
| 73 |
|
| 74 |
saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
|
|
|
|
| 68 |
tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
|
| 69 |
|
| 70 |
if not opt.start_from:
|
| 71 |
+
backup_envir(save_folder, opt)
|
| 72 |
logger.info('backup evironment completed !')
|
| 73 |
|
| 74 |
saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
|
backup/train_ft2_gt.py
CHANGED
|
@@ -147,7 +147,7 @@ def train(opt):
|
|
| 147 |
tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
|
| 148 |
|
| 149 |
if not opt.start_from:
|
| 150 |
-
backup_envir(save_folder)
|
| 151 |
logger.info('backup evironment completed !')
|
| 152 |
|
| 153 |
saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
|
|
@@ -190,16 +190,17 @@ def train(opt):
|
|
| 190 |
# train_dataset.translator = train_dataset_1.translator
|
| 191 |
|
| 192 |
else:
|
| 193 |
-
print('the script only support two dataset for pretrain and target task respectively')
|
| 194 |
-
exit(1)
|
| 195 |
train_dataset_target = PropSeqDataset(opt.train_caption_file,
|
| 196 |
opt.visual_feature_folder,
|
| 197 |
opt.text_feature_folder,
|
| 198 |
opt.dict_file, True, 'gt',
|
| 199 |
opt)
|
| 200 |
-
|
|
|
|
| 201 |
shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
|
| 202 |
-
train_dataloaders = [train_loader_target]
|
| 203 |
|
| 204 |
# val_dataset = PropSeqDataset(opt.val_caption_file,
|
| 205 |
# opt.visual_feature_folder,
|
|
|
|
| 147 |
tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
|
| 148 |
|
| 149 |
if not opt.start_from:
|
| 150 |
+
backup_envir(save_folder, opt)
|
| 151 |
logger.info('backup evironment completed !')
|
| 152 |
|
| 153 |
saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
|
|
|
|
| 190 |
# train_dataset.translator = train_dataset_1.translator
|
| 191 |
|
| 192 |
else:
|
| 193 |
+
# print('the script only support two dataset for pretrain and target task respectively')
|
| 194 |
+
# exit(1)
|
| 195 |
train_dataset_target = PropSeqDataset(opt.train_caption_file,
|
| 196 |
opt.visual_feature_folder,
|
| 197 |
opt.text_feature_folder,
|
| 198 |
opt.dict_file, True, 'gt',
|
| 199 |
opt)
|
| 200 |
+
subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent)
|
| 201 |
+
train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size,
|
| 202 |
shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn)
|
| 203 |
+
# train_dataloaders = [train_loader_target]
|
| 204 |
|
| 205 |
# val_dataset = PropSeqDataset(opt.val_caption_file,
|
| 206 |
# opt.visual_feature_folder,
|
backup/train_pre_ft_gt.py
CHANGED
|
@@ -45,7 +45,7 @@ import copy
|
|
| 45 |
a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
|
| 46 |
r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
|
| 47 |
|
| 48 |
-
pretrain_data_mode = '
|
| 49 |
|
| 50 |
# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
|
| 51 |
# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
|
|
@@ -122,7 +122,7 @@ def train(opt):
|
|
| 122 |
tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
|
| 123 |
|
| 124 |
if not opt.start_from:
|
| 125 |
-
backup_envir(save_folder)
|
| 126 |
logger.info('backup evironment completed !')
|
| 127 |
|
| 128 |
saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
|
|
|
|
| 45 |
a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features']
|
| 46 |
r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m']
|
| 47 |
|
| 48 |
+
pretrain_data_mode = 'mix' # 'mix' or 'seq' or 'single'
|
| 49 |
|
| 50 |
# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m
|
| 51 |
# /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features
|
|
|
|
| 122 |
tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary'))
|
| 123 |
|
| 124 |
if not opt.start_from:
|
| 125 |
+
backup_envir(save_folder, opt)
|
| 126 |
logger.info('backup evironment completed !')
|
| 127 |
|
| 128 |
saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}}
|