namithap commited on
Commit
d37e83c
·
1 Parent(s): 57309fe

initial commit

Browse files
Files changed (45) hide show
  1. .gitattributes +5 -0
  2. LICENSE.txt +3 -0
  3. README.md +3 -0
  4. nervenc/480p_finetuned_baseline/cfg.yaml +103 -0
  5. nervenc/480p_finetuned_baseline/epoch-last.pth +3 -0
  6. nervenc/480p_finetuned_baseline_small/cfg.yaml +103 -0
  7. nervenc/480p_finetuned_baseline_small/epoch-last.pth +3 -0
  8. nervenc/720p_finetuned_baseline/cfg.yaml +104 -0
  9. nervenc/720p_finetuned_baseline/epoch-last.pth +3 -0
  10. nervenc/pre_finetune/pre_finetune_480p_baseline/cfg.yaml +101 -0
  11. nervenc/pre_finetune/pre_finetune_480p_baseline/epoch-last.pth +3 -0
  12. nervenc/pre_finetune/pre_finetune_480p_baseline_small/cfg.yaml +101 -0
  13. nervenc/pre_finetune/pre_finetune_480p_baseline_small/epoch-last.pth +3 -0
  14. nervenc/pre_finetune/pre_finetune_720p_baseline/cfg.yaml +101 -0
  15. nervenc/pre_finetune/pre_finetune_720p_baseline/epoch-last.pth +3 -0
  16. patch_tubelet/320x160_finetuned_patch/cfg.yaml +112 -0
  17. patch_tubelet/320x160_finetuned_patch/epoch-last.pth +3 -0
  18. patch_tubelet/320x160_finetuned_patch_small/cfg.yaml +112 -0
  19. patch_tubelet/320x160_finetuned_patch_small/epoch-last.pth +3 -0
  20. patch_tubelet/320x240_finetuned_patch_train_720p/cfg.yaml +112 -0
  21. patch_tubelet/320x240_finetuned_patch_train_720p/epoch-last.pth +3 -0
  22. patch_tubelet/pre_finetune/pre_finetune_320x160_patch/cfg.yaml +110 -0
  23. patch_tubelet/pre_finetune/pre_finetune_320x160_patch/epoch-last.pth +3 -0
  24. patch_tubelet/pre_finetune/pre_finetune_320x160_patch_small/cfg.yaml +110 -0
  25. patch_tubelet/pre_finetune/pre_finetune_320x160_patch_small/epoch-last.pth +3 -0
  26. patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_480p/cfg.yaml +110 -0
  27. patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_480p/epoch-last.pth +3 -0
  28. patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_720p/cfg.yaml +110 -0
  29. patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_720p/epoch-last.pth +3 -0
  30. patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_480p/cfg.yaml +110 -0
  31. patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_480p/epoch-last.pth +3 -0
  32. patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_720p/cfg.yaml +110 -0
  33. patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_720p/epoch-last.pth +3 -0
  34. teconerv/320x160_pairs_teco/cfg.yaml +115 -0
  35. teconerv/320x160_pairs_teco/epoch-last.pth +3 -0
  36. teconerv/320x160_pairs_teco_small/cfg.yaml +115 -0
  37. teconerv/320x160_pairs_teco_small/epoch-last.pth +3 -0
  38. teconerv/320x240_pairs_teco/cfg.yaml +115 -0
  39. teconerv/320x240_pairs_teco/epoch-last.pth +3 -0
  40. teconerv/320x240_pairs_teco_train_720p/cfg.yaml +115 -0
  41. teconerv/320x240_pairs_teco_train_720p/epoch-last.pth +3 -0
  42. teconerv/384x270_pairs_teco/cfg.yaml +115 -0
  43. teconerv/384x270_pairs_teco/epoch-last.pth +3 -0
  44. teconerv/384x270_pairs_teco_train_720p/cfg.yaml +115 -0
  45. teconerv/384x270_pairs_teco_train_720p/epoch-last.pth +3 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ LICENSE.txt filter=lfs diff=lfs merge=lfs -text
37
+ README.md filter=lfs diff=lfs merge=lfs -text
38
+ nervenc filter=lfs diff=lfs merge=lfs -text
39
+ patch_tubelet filter=lfs diff=lfs merge=lfs -text
40
+ teconerv filter=lfs diff=lfs merge=lfs -text
LICENSE.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:149d807167937014a4633d3a72ce59e201b01087b08c901257bd17c416481bc5
3
+ size 1071
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:881c0ec81d89da3e6b8b32c9efd8a46e14a71ee5218de4b7c4908f151c34cb24
3
+ size 1519
nervenc/480p_finetuned_baseline/cfg.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer
2
+ train_dataset:
3
+ name: vidrec_dataset_clip_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ scale: 1
15
+ aspect_ratio: 1
16
+ rand_flip: 'no'
17
+ clips_per_video: 1
18
+ loader:
19
+ batch_size: 8
20
+ num_workers: 16
21
+ test_dataset:
22
+ name: vidrec_dataset_clip_inference_lazy_uvg
23
+ args:
24
+ root_path: data/dataset_meta
25
+ frame_num: 8
26
+ cls_vid_num: -1_-1
27
+ crop_size:
28
+ - 480
29
+ - 640
30
+ csv_paths:
31
+ uvg: uvg_hd.csv
32
+ frames:
33
+ input: none
34
+ output: none
35
+ loader:
36
+ batch_size: 8
37
+ num_workers: 16
38
+ model:
39
+ name: nerv_enc
40
+ args:
41
+ tokenizer:
42
+ name: vidrec_tokenizer
43
+ args:
44
+ input_size:
45
+ - 480
46
+ - 640
47
+ patch_size: 32
48
+ padding: 0
49
+ frame_num: 8
50
+ eval_frames: none
51
+ img_groups: 1
52
+ hyponet:
53
+ name: hypo_convnets_full_res
54
+ args:
55
+ in_dim: 1
56
+ out_dim: 3
57
+ out_bias: tanh
58
+ strds_h: '5_4_4_3_2'
59
+ strds_w: '5_4_4_4_2'
60
+ ks: '1_3'
61
+ hid_dim: 32
62
+ size: none
63
+ act: gelu
64
+ use_pe: true
65
+ pe_dim: 32
66
+ n_tokens: '32_256_32_24_0'
67
+ token_dims: '200_288_288_288_0'
68
+ transformer_encoder:
69
+ name: transformer_encoder
70
+ args:
71
+ dim: 720
72
+ depth: 6
73
+ n_head: 12
74
+ head_dim: 64
75
+ ff_dim: 2800
76
+ optimizer:
77
+ name: adam
78
+ args:
79
+ lr: 0.0001
80
+ lr_type: step
81
+ max_epoch: 50
82
+ eval_epoch: 200
83
+ vis_epoch: 2000
84
+ dump_ckt: 'no'
85
+ dump_pred: 'no'
86
+ dump_video: 'no'
87
+ generate_from_single_frame: false
88
+ finetune_model: checkpoints/nervenc/pre_finetune/pre_finetune_480p_baseline/epoch-last.pth
89
+ finetune_same_model: true
90
+ env:
91
+ exp_name: nervenc
92
+ save_dir: checkpoints/nervenc/480p_finetuned_baseline
93
+ instance_tag: 480p_finetuned_baseline
94
+ tot_gpus: 1
95
+ cudnn: false
96
+ port: '29600'
97
+ wandb_upload: false
98
+ wandb_exp_name: null
99
+ wandb_run_id: none
100
+ distributed: false
101
+ rank: 0
102
+ world_size: 1
103
+ gpu: null
nervenc/480p_finetuned_baseline/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b99efdba84aee0c111efe1ba651de06e342142478b6bdf32a06eb79ed4b9451a
3
+ size 514966594
nervenc/480p_finetuned_baseline_small/cfg.yaml ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer
2
+ train_dataset:
3
+ name: vidrec_dataset_clip_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ scale: 1
15
+ aspect_ratio: 1
16
+ rand_flip: 'no'
17
+ clips_per_video: 1
18
+ loader:
19
+ batch_size: 8
20
+ num_workers: 24
21
+ test_dataset:
22
+ name: vidrec_dataset_clip_inference_lazy_uvg
23
+ args:
24
+ root_path: data/dataset_meta
25
+ frame_num: 8
26
+ cls_vid_num: -1_-1
27
+ crop_size:
28
+ - 480
29
+ - 640
30
+ csv_paths:
31
+ uvg: uvg_hd.csv
32
+ frames:
33
+ input: none
34
+ output: none
35
+ loader:
36
+ batch_size: 8
37
+ num_workers: 24
38
+ model:
39
+ name: nerv_enc
40
+ args:
41
+ tokenizer:
42
+ name: vidrec_tokenizer
43
+ args:
44
+ input_size:
45
+ - 480
46
+ - 640
47
+ patch_size: 32
48
+ padding: 0
49
+ frame_num: 8
50
+ eval_frames: none
51
+ img_groups: 1
52
+ hyponet:
53
+ name: hypo_convnets_full_res
54
+ args:
55
+ in_dim: 1
56
+ out_dim: 3
57
+ out_bias: tanh
58
+ strds_h: '5_4_4_3_2'
59
+ strds_w: '5_4_4_4_2'
60
+ ks: '1_3'
61
+ hid_dim: 20
62
+ size: none
63
+ act: gelu
64
+ use_pe: true
65
+ pe_dim: 20
66
+ n_tokens: '20_160_20_20_0'
67
+ token_dims: '125_120_288_180_0'
68
+ transformer_encoder:
69
+ name: transformer_encoder
70
+ args:
71
+ dim: 720
72
+ depth: 6
73
+ n_head: 12
74
+ head_dim: 64
75
+ ff_dim: 2800
76
+ optimizer:
77
+ name: adam
78
+ args:
79
+ lr: 0.0001
80
+ lr_type: step
81
+ max_epoch: 50
82
+ eval_epoch: 200
83
+ vis_epoch: 2000
84
+ dump_ckt: 'no'
85
+ dump_pred: 'no'
86
+ dump_video: 'no'
87
+ generate_from_single_frame: false
88
+ finetune_model: checkpoints/nervenc/pre_finetune/pre_finetune_480p_baseline_small/epoch-last.pth
89
+ finetune_same_model: true
90
+ env:
91
+ exp_name: nervenc
92
+ save_dir: checkpoints/nervenc/480p_finetuned_baseline_small
93
+ instance_tag: 480p_finetuned_baseline_small
94
+ tot_gpus: 1
95
+ cudnn: false
96
+ port: '29600'
97
+ wandb_upload: false
98
+ wandb_exp_name: null
99
+ wandb_run_id: none
100
+ distributed: false
101
+ rank: 0
102
+ world_size: 1
103
+ gpu: null
nervenc/480p_finetuned_baseline_small/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e100335142ea79a40ae1259c0fb25a50512a8a9117313def7d618c52955f07d5
3
+ size 507680130
nervenc/720p_finetuned_baseline/cfg.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer
2
+ train_dataset:
3
+ name: vidrec_dataset_clip_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_720p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 720
13
+ - 1280
14
+ scale: 1
15
+ aspect_ratio: 1
16
+ rand_flip: 'no'
17
+ clips_per_video: 1
18
+ loader:
19
+ batch_size: 8
20
+ num_workers: 16
21
+ test_dataset:
22
+ name: vidrec_dataset_clip_inference_lazy_uvg
23
+ args:
24
+ root_path: data/dataset_meta
25
+ frame_num: 8
26
+ cls_vid_num: -1_-1
27
+ crop_size:
28
+ - 720
29
+ - 1280
30
+ csv_paths:
31
+ uvg: uvg_hd.csv
32
+ frames:
33
+ input: none
34
+ output: none
35
+ loader:
36
+ batch_size: 8
37
+ num_workers: 16
38
+ model:
39
+ name: nerv_enc
40
+ args:
41
+ tokenizer:
42
+ name: vidrec_tokenizer
43
+ args:
44
+ input_size:
45
+ - 720
46
+ - 1280
47
+ patch_size: 32
48
+ padding: 0
49
+ frame_num: 8
50
+ eval_frames: none
51
+ img_groups: 1
52
+ hyponet:
53
+ name: hypo_convnets_full_res
54
+ args:
55
+ in_dim: 1
56
+ out_dim: 3
57
+ out_bias: tanh
58
+ strds_h: '5_4_4_3_3'
59
+ strds_w: '5_4_4_4_4'
60
+ ks: '1_3'
61
+ hid_dim: 56
62
+ size: none
63
+ act: gelu
64
+ use_pe: true
65
+ pe_dim: 56
66
+ n_tokens: '56_448_112_112_0'
67
+ token_dims: '350_504_224_168_0'
68
+ transformer_encoder:
69
+ name: transformer_encoder
70
+ args:
71
+ dim: 720
72
+ depth: 6
73
+ n_head: 12
74
+ head_dim: 64
75
+ ff_dim: 2800
76
+ optimizer:
77
+ name: adam
78
+ args:
79
+ lr: 0.0001
80
+ lr_type: step
81
+ max_epoch: 50
82
+ eval_epoch: 50
83
+ vis_epoch: 2000
84
+ dump_ckt: 'no'
85
+ dump_pred: 'no'
86
+ dump_video: 'no'
87
+ generate_from_single_frame: false
88
+ finetune_model: checkpoints/nervenc/pre_finetune/pre_finetune_720p_baseline/epoch-last.pth
89
+ finetune_same_model: true
90
+ env:
91
+ exp_name: nervenc
92
+ save_dir: checkpoints/nervenc/720p_finetuned_baseline
93
+ instance_tag: 720p_finetuned_baseline
94
+ tot_gpus: 4
95
+ cudnn: false
96
+ port: '4645'
97
+ wandb_upload: false
98
+ wandb_exp_name: null
99
+ rank: 0
100
+ world_size: 4
101
+ gpu: 0
102
+ distributed: true
103
+ dist_backend: nccl
104
+ wandb_run_id: none
nervenc/720p_finetuned_baseline/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea0687cabbee361487cd9a4c49128ca7be7cfb63c0865d9a3e6fbfededdb42c
3
+ size 570814594
nervenc/pre_finetune/pre_finetune_480p_baseline/cfg.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer
2
+ train_dataset:
3
+ name: vidrec_dataset_clip_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ scale: 1
15
+ aspect_ratio: 1
16
+ rand_flip: 'no'
17
+ clips_per_video: 1
18
+ loader:
19
+ batch_size: 8
20
+ num_workers: 16
21
+ test_dataset:
22
+ name: vidrec_dataset_clip_inference_lazy_uvg
23
+ args:
24
+ root_path: data/dataset_meta
25
+ frame_num: 8
26
+ cls_vid_num: -1_-1
27
+ crop_size:
28
+ - 480
29
+ - 640
30
+ csv_paths:
31
+ uvg: uvg_hd.csv
32
+ frames:
33
+ input: none
34
+ output: none
35
+ loader:
36
+ batch_size: 8
37
+ num_workers: 16
38
+ model:
39
+ name: nerv_enc
40
+ args:
41
+ tokenizer:
42
+ name: vidrec_tokenizer
43
+ args:
44
+ input_size:
45
+ - 480
46
+ - 640
47
+ patch_size: 32
48
+ padding: 0
49
+ frame_num: 8
50
+ eval_frames: none
51
+ img_groups: 1
52
+ hyponet:
53
+ name: hypo_convnets_full_res
54
+ args:
55
+ in_dim: 1
56
+ out_dim: 3
57
+ out_bias: tanh
58
+ strds_h: '5_4_4_3_2'
59
+ strds_w: '5_4_4_4_2'
60
+ ks: '1_3'
61
+ hid_dim: 32
62
+ size: none
63
+ act: gelu
64
+ use_pe: true
65
+ pe_dim: 32
66
+ n_tokens: '32_256_32_24_0'
67
+ token_dims: '200_288_288_288_0'
68
+ transformer_encoder:
69
+ name: transformer_encoder
70
+ args:
71
+ dim: 720
72
+ depth: 6
73
+ n_head: 12
74
+ head_dim: 64
75
+ ff_dim: 2800
76
+ optimizer:
77
+ name: adam
78
+ args:
79
+ lr: 0.0001
80
+ lr_type: step
81
+ max_epoch: 150
82
+ eval_epoch: 200
83
+ vis_epoch: 2000
84
+ dump_ckt: 'no'
85
+ dump_pred: 'no'
86
+ dump_video: 'no'
87
+ generate_from_single_frame: false
88
+ env:
89
+ exp_name: nervenc
90
+ save_dir: checkpoints/nervenc/pre_finetune/pre_finetune_480p_baseline
91
+ instance_tag: pre_finetune_480p_baseline
92
+ tot_gpus: 1
93
+ cudnn: false
94
+ port: '29600'
95
+ wandb_upload: false
96
+ wandb_exp_name: null
97
+ wandb_run_id: none
98
+ distributed: false
99
+ rank: 0
100
+ world_size: 1
101
+ gpu: null
nervenc/pre_finetune/pre_finetune_480p_baseline/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3dfb4c404d2e17ca15cff392a7ba43cb726b5f136c37dce70c6aecffe97ab9f
3
+ size 514966466
nervenc/pre_finetune/pre_finetune_480p_baseline_small/cfg.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer
2
+ train_dataset:
3
+ name: vidrec_dataset_clip_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ scale: 1
15
+ aspect_ratio: 1
16
+ rand_flip: 'no'
17
+ clips_per_video: 1
18
+ loader:
19
+ batch_size: 8
20
+ num_workers: 24
21
+ test_dataset:
22
+ name: vidrec_dataset_clip_inference_lazy_uvg
23
+ args:
24
+ root_path: data/dataset_meta
25
+ frame_num: 8
26
+ cls_vid_num: -1_-1
27
+ crop_size:
28
+ - 480
29
+ - 640
30
+ csv_paths:
31
+ uvg: uvg_hd.csv
32
+ frames:
33
+ input: none
34
+ output: none
35
+ loader:
36
+ batch_size: 8
37
+ num_workers: 24
38
+ model:
39
+ name: nerv_enc
40
+ args:
41
+ tokenizer:
42
+ name: vidrec_tokenizer
43
+ args:
44
+ input_size:
45
+ - 480
46
+ - 640
47
+ patch_size: 32
48
+ padding: 0
49
+ frame_num: 8
50
+ eval_frames: none
51
+ img_groups: 1
52
+ hyponet:
53
+ name: hypo_convnets_full_res
54
+ args:
55
+ in_dim: 1
56
+ out_dim: 3
57
+ out_bias: tanh
58
+ strds_h: '5_4_4_3_2'
59
+ strds_w: '5_4_4_4_2'
60
+ ks: '1_3'
61
+ hid_dim: 20
62
+ size: none
63
+ act: gelu
64
+ use_pe: true
65
+ pe_dim: 20
66
+ n_tokens: '20_160_20_20_0'
67
+ token_dims: '125_120_288_180_0'
68
+ transformer_encoder:
69
+ name: transformer_encoder
70
+ args:
71
+ dim: 720
72
+ depth: 6
73
+ n_head: 12
74
+ head_dim: 64
75
+ ff_dim: 2800
76
+ optimizer:
77
+ name: adam
78
+ args:
79
+ lr: 0.0001
80
+ lr_type: step
81
+ max_epoch: 150
82
+ eval_epoch: 150
83
+ vis_epoch: 2000
84
+ dump_ckt: 'no'
85
+ dump_pred: 'no'
86
+ dump_video: 'no'
87
+ generate_from_single_frame: false
88
+ env:
89
+ exp_name: nervenc
90
+ save_dir: checkpoints/nervenc/pre_finetune/pre_finetune_480p_baseline_small
91
+ instance_tag: pre_finetune_480p_baseline_small
92
+ tot_gpus: 1
93
+ cudnn: false
94
+ port: '29531'
95
+ wandb_upload: false
96
+ wandb_exp_name: null
97
+ wandb_run_id: none
98
+ distributed: false
99
+ rank: 0
100
+ world_size: 1
101
+ gpu: null
nervenc/pre_finetune/pre_finetune_480p_baseline_small/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff513e0f0a4537f91990e11e726ad0c0e2b83a9d280e97e8968999621d889478
3
+ size 507680002
nervenc/pre_finetune/pre_finetune_720p_baseline/cfg.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer
2
+ train_dataset:
3
+ name: vidrec_dataset_clip_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_720p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 720
13
+ - 1280
14
+ scale: 1
15
+ aspect_ratio: 1
16
+ rand_flip: 'no'
17
+ clips_per_video: 1
18
+ loader:
19
+ batch_size: 4
20
+ num_workers: 16
21
+ test_dataset:
22
+ name: vidrec_dataset_clip_inference_lazy_uvg
23
+ args:
24
+ root_path: data/dataset_meta
25
+ frame_num: 8
26
+ cls_vid_num: -1_-1
27
+ crop_size:
28
+ - 720
29
+ - 1280
30
+ csv_paths:
31
+ uvg: uvg_hd.csv
32
+ frames:
33
+ input: none
34
+ output: none
35
+ loader:
36
+ batch_size: 4
37
+ num_workers: 16
38
+ model:
39
+ name: nerv_enc
40
+ args:
41
+ tokenizer:
42
+ name: vidrec_tokenizer
43
+ args:
44
+ input_size:
45
+ - 720
46
+ - 1280
47
+ patch_size: 32
48
+ padding: 0
49
+ frame_num: 8
50
+ eval_frames: none
51
+ img_groups: 1
52
+ hyponet:
53
+ name: hypo_convnets_full_res
54
+ args:
55
+ in_dim: 1
56
+ out_dim: 3
57
+ out_bias: tanh
58
+ strds_h: '5_4_4_3_3'
59
+ strds_w: '5_4_4_4_4'
60
+ ks: '1_3'
61
+ hid_dim: 56
62
+ size: none
63
+ act: gelu
64
+ use_pe: true
65
+ pe_dim: 56
66
+ n_tokens: '56_448_112_112_0'
67
+ token_dims: '350_504_224_168_0'
68
+ transformer_encoder:
69
+ name: transformer_encoder
70
+ args:
71
+ dim: 720
72
+ depth: 6
73
+ n_head: 12
74
+ head_dim: 64
75
+ ff_dim: 2800
76
+ optimizer:
77
+ name: adam
78
+ args:
79
+ lr: 0.0001
80
+ lr_type: step
81
+ max_epoch: 150
82
+ eval_epoch: 200
83
+ vis_epoch: 2000
84
+ dump_ckt: 'no'
85
+ dump_pred: 'no'
86
+ dump_video: 'no'
87
+ generate_from_single_frame: false
88
+ env:
89
+ exp_name: nervenc
90
+ save_dir: checkpoints/nervenc/pre_finetune/pre_finetune_720p_baseline
91
+ instance_tag: pre_finetune_720p_baseline
92
+ tot_gpus: 1
93
+ cudnn: false
94
+ port: '29600'
95
+ wandb_upload: false
96
+ wandb_exp_name: null
97
+ wandb_run_id: none
98
+ distributed: false
99
+ rank: 0
100
+ world_size: 1
101
+ gpu: null
nervenc/pre_finetune/pre_finetune_720p_baseline/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10119e6ad5c1e0b27f2ae99a383913b991a5f15f0422cbb297776e9fd467cc69
3
+ size 570814466
patch_tubelet/320x160_finetuned_patch/cfg.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: 'no'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 160
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 160
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 160
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_2'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 14
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 14
73
+ n_tokens: '5_56_4_0'
74
+ token_dims: '196_252_196_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ finetune_model: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x160_patch/epoch-last.pth
97
+ finetune_same_model: true
98
+ env:
99
+ exp_name: patch_tubelet
100
+ save_dir: checkpoints/patch_tubelet/320x160_finetuned_patch
101
+ instance_tag: 320x160_finetuned_patch
102
+ tot_gpus: 4
103
+ cudnn: false
104
+ port: '9503'
105
+ wandb_upload: false
106
+ rank: 0
107
+ world_size: 4
108
+ gpu: 0
109
+ distributed: true
110
+ dist_backend: nccl
111
+ wandb_exp_name: null
112
+ wandb_run_id: none
patch_tubelet/320x160_finetuned_patch/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:885d3aae37c82efc6547a924a225202fb6c641461afb3e9acb75a21633834153
3
+ size 495460270
patch_tubelet/320x160_finetuned_patch_small/cfg.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: 'no'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 160
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 160
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 160
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_2'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 14
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 14
73
+ n_tokens: '5_16_4_0'
74
+ token_dims: '140_252_98_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ finetune_model: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x160_patch_small/epoch-last.pth
97
+ finetune_same_model: true
98
+ env:
99
+ exp_name: patch_tubelet
100
+ save_dir: checkpoints/patch_tubelet/320x160_finetuned_patch_small
101
+ instance_tag: 320x160_finetuned_patch_small
102
+ tot_gpus: 4
103
+ cudnn: false
104
+ port: '15419'
105
+ wandb_upload: false
106
+ rank: 0
107
+ world_size: 4
108
+ gpu: 0
109
+ distributed: true
110
+ dist_backend: nccl
111
+ wandb_exp_name: null
112
+ wandb_run_id: none
patch_tubelet/320x160_finetuned_patch_small/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40204a65e856def367f0d2202d96fdb2f979092d36f4ada67301e161b12d4bc3
3
+ size 493782254
patch_tubelet/320x240_finetuned_patch_train_720p/cfg.yaml ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: 'no'
9
+ csv_file: k400_2023_train_cls400_50_720p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 720
13
+ - 1280
14
+ tubelet_size:
15
+ - 240
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 720
33
+ - 1280
34
+ tubelet_size:
35
+ - 240
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 240
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_3'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 20
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 16
73
+ n_tokens: '10_80_16_0'
74
+ token_dims: '200_240_240_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ finetune_model: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_720p/epoch-last.pth
97
+ finetune_same_model: true
98
+ env:
99
+ exp_name: patch_tubelet
100
+ save_dir: checkpoints/patch_tubelet/320x240_finetuned_patch_train_720p
101
+ instance_tag: 320x240_finetuned_patch_train_720p
102
+ tot_gpus: 8
103
+ cudnn: false
104
+ port: '15419'
105
+ wandb_upload: false
106
+ rank: 0
107
+ world_size: 8
108
+ gpu: 0
109
+ distributed: true
110
+ dist_backend: nccl
111
+ wandb_exp_name: null
112
+ wandb_run_id: none
patch_tubelet/320x240_finetuned_patch_train_720p/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ed7817598226c11e07ea5254e3761aa09c2cfd38594f84e3fb7eae6856767fe
3
+ size 498296558
patch_tubelet/pre_finetune/pre_finetune_320x160_patch/cfg.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: 'no'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 160
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 160
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 160
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_2'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 14
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 14
73
+ n_tokens: '5_56_4_0'
74
+ token_dims: '196_252_196_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ env:
97
+ exp_name: patch_tubelet
98
+ save_dir: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x160_patch
99
+ instance_tag: pre_finetune_320x160_patch
100
+ tot_gpus: 4
101
+ cudnn: false
102
+ port: '9503'
103
+ wandb_upload: false
104
+ rank: 0
105
+ world_size: 4
106
+ gpu: 0
107
+ distributed: true
108
+ dist_backend: nccl
109
+ wandb_exp_name: null
110
+ wandb_run_id: none
patch_tubelet/pre_finetune/pre_finetune_320x160_patch/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf8df38940c13fa6a33943cfdd367e9e115c4dea622ddfdb77f4acb3baf6ef0
3
+ size 495460206
patch_tubelet/pre_finetune/pre_finetune_320x160_patch_small/cfg.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: 'no'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 160
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 160
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 160
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_2'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 14
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 14
73
+ n_tokens: '5_16_4_0'
74
+ token_dims: '140_252_98_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 150
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ env:
97
+ exp_name: patch_tubelet
98
+ save_dir: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x160_patch_small
99
+ instance_tag: pre_finetune_320x160_patch_small
100
+ tot_gpus: 4
101
+ cudnn: false
102
+ port: '15419'
103
+ wandb_upload: false
104
+ rank: 0
105
+ world_size: 4
106
+ gpu: 0
107
+ distributed: true
108
+ dist_backend: nccl
109
+ wandb_exp_name: null
110
+ wandb_run_id: none
patch_tubelet/pre_finetune/pre_finetune_320x160_patch_small/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4541bd2866e16cf7b968ff9fca2594bc225f8f6fb822970c4a021060bf2fdc80
3
+ size 493782126
patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_480p/cfg.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: 'no'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 240
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 240
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 240
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_3'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 20
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 16
73
+ n_tokens: '10_80_16_0'
74
+ token_dims: '200_240_240_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 150
90
+ eval_epoch: 150
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ env:
97
+ exp_name: patch_tubelet
98
+ save_dir: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_480p
99
+ instance_tag: pre_finetune_320x240_patch_train_480p
100
+ tot_gpus: 4
101
+ cudnn: false
102
+ port: '29827'
103
+ wandb_upload: false
104
+ rank: 0
105
+ world_size: 4
106
+ gpu: 0
107
+ distributed: true
108
+ dist_backend: nccl
109
+ wandb_exp_name: null
110
+ wandb_run_id: none
patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_480p/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c00485b39f96c5bdb62bfda1ff5e1bd7c7f4854d0bbc1cb470a0f6deea717ad4
3
+ size 498296430
patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_720p/cfg.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: 'no'
9
+ csv_file: k400_2023_train_cls400_50_720p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 720
13
+ - 1280
14
+ tubelet_size:
15
+ - 240
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 720
33
+ - 1280
34
+ tubelet_size:
35
+ - 240
36
+ - 320
37
+ csv_paths:
38
+ uvg_720: uvg_hd_720p.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 240
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_3'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 20
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 16
73
+ n_tokens: '10_80_16_0'
74
+ token_dims: '200_240_240_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 150
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ env:
97
+ exp_name: patch_tubelet
98
+ save_dir: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_720p
99
+ instance_tag: pre_finetune_320x240_patch_train_720p
100
+ tot_gpus: 4
101
+ cudnn: false
102
+ port: '15419'
103
+ wandb_upload: false
104
+ rank: 0
105
+ world_size: 4
106
+ gpu: 0
107
+ distributed: true
108
+ dist_backend: nccl
109
+ wandb_exp_name: null
110
+ wandb_run_id: none
patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_720p/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a020901b01b660432d31d7709618a93e93091653855f25f0728c7a5089ae135
3
+ size 498296430
patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_480p/cfg.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: 'no'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 270
16
+ - 384
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 270
36
+ - 384
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 270
53
+ - 384
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '6_5_3_3'
66
+ strds_w: '6_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 20
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 20
73
+ n_tokens: '16_100_16_0'
74
+ token_dims: '180_240_180_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 150
90
+ eval_epoch: 150
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ env:
97
+ exp_name: patch_tubelet
98
+ save_dir: checkpoints/patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_480p
99
+ instance_tag: pre_finetune_384x270_patch_train_480p
100
+ tot_gpus: 4
101
+ cudnn: false
102
+ port: '15419'
103
+ wandb_upload: false
104
+ rank: 0
105
+ world_size: 4
106
+ gpu: 0
107
+ distributed: true
108
+ dist_backend: nccl
109
+ wandb_exp_name: null
110
+ wandb_run_id: none
patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_480p/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2301668741892201c35f9710155bac0f13437d26b39096902f17ab39a9f58d68
3
+ size 499705518
patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_720p/cfg.yaml ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: 'no'
9
+ csv_file: k400_2023_train_cls400_50_720p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 720
13
+ - 1280
14
+ tubelet_size:
15
+ - 270
16
+ - 384
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 720
33
+ - 1280
34
+ tubelet_size:
35
+ - 270
36
+ - 384
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 270
53
+ - 384
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '6_5_3_3'
66
+ strds_w: '6_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 20
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 20
73
+ n_tokens: '16_100_16_0'
74
+ token_dims: '180_240_180_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 150
90
+ eval_epoch: 150
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ env:
97
+ exp_name: patch_tubelet
98
+ save_dir: checkpoints/patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_720p
99
+ instance_tag: pre_finetune_384x270_patch_train_720p
100
+ tot_gpus: 4
101
+ cudnn: false
102
+ port: '34306'
103
+ wandb_upload: false
104
+ rank: 0
105
+ world_size: 4
106
+ gpu: 0
107
+ distributed: true
108
+ dist_backend: nccl
109
+ wandb_exp_name: null
110
+ wandb_run_id: none
patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_720p/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c2afa676bd450479b9ffc32a29af06b6ba499255dc98d3a996a1b2899ec0362
3
+ size 499705518
teconerv/320x160_pairs_teco/cfg.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res_pairs
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy_pairs
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 160
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_pairs_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 160
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res_pairs
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 160
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_2'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 14
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 14
73
+ n_tokens: '5_56_4_0'
74
+ token_dims: '196_252_196_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ finetune_model: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x160_patch/epoch-last.pth
97
+ finetune_same_model: false
98
+ param_reg_mode: mod
99
+ param_reg_lambda_l1: 0.1
100
+ param_reg_lambda_l2: 0.0
101
+ env:
102
+ exp_name: teconerv
103
+ save_dir: checkpoints/teconerv/320x160_pairs_teco
104
+ instance_tag: 320x160_pairs_teco
105
+ tot_gpus: 4
106
+ cudnn: false
107
+ port: '15419'
108
+ wandb_upload: false
109
+ rank: 0
110
+ world_size: 4
111
+ gpu: 0
112
+ distributed: true
113
+ dist_backend: nccl
114
+ wandb_exp_name: null
115
+ wandb_run_id: none
teconerv/320x160_pairs_teco/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2ed76b258306ec03a2c14ed37d2fd223debdf84ff4589a5e3876ddef8436ff6
3
+ size 495460398
teconerv/320x160_pairs_teco_small/cfg.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res_pairs
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy_pairs
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 160
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_pairs_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 160
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res_pairs
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 160
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_2'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 14
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 14
73
+ n_tokens: '5_16_4_0'
74
+ token_dims: '140_252_98_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ finetune_model: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x160_patch_small/epoch-last.pth
97
+ finetune_same_model: false
98
+ param_reg_mode: mod
99
+ param_reg_lambda_l1: 0.1
100
+ param_reg_lambda_l2: 0.0
101
+ env:
102
+ exp_name: teconerv
103
+ save_dir: checkpoints/teconerv/320x160_pairs_teco_small
104
+ instance_tag: 320x160_pairs_teco_small
105
+ tot_gpus: 4
106
+ cudnn: false
107
+ port: '15419'
108
+ wandb_upload: false
109
+ rank: 0
110
+ world_size: 4
111
+ gpu: 0
112
+ distributed: true
113
+ dist_backend: nccl
114
+ wandb_exp_name: null
115
+ wandb_run_id: none
teconerv/320x160_pairs_teco_small/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64b09a04bdbe61ed92046b528cb73d6a72817c8a945c7514d59d29e094ed4627
3
+ size 493782382
teconerv/320x240_pairs_teco/cfg.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res_pairs
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy_pairs
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 240
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_pairs_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 240
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res_pairs
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 240
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_3'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 20
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 16
73
+ n_tokens: '10_80_16_0'
74
+ token_dims: '200_240_240_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ finetune_model: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_480p/epoch-last.pth
97
+ finetune_same_model: false
98
+ param_reg_mode: mod
99
+ param_reg_lambda_l1: 0.1
100
+ param_reg_lambda_l2: 0.0
101
+ env:
102
+ exp_name: teconerv
103
+ save_dir: checkpoints/teconerv/320x240_pairs_teco
104
+ instance_tag: 320x240_pairs_teco
105
+ tot_gpus: 4
106
+ cudnn: false
107
+ port: '15419'
108
+ wandb_upload: false
109
+ rank: 0
110
+ world_size: 4
111
+ gpu: 0
112
+ distributed: true
113
+ dist_backend: nccl
114
+ wandb_exp_name: null
115
+ wandb_run_id: none
teconerv/320x240_pairs_teco/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0508493a47424adb8c317a2cef6adca4294063c6f8c79f487f616e1ff44739fa
3
+ size 498296686
teconerv/320x240_pairs_teco_train_720p/cfg.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res_pairs
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy_pairs
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_720p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 720
13
+ - 1280
14
+ tubelet_size:
15
+ - 240
16
+ - 320
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_pairs_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 720
33
+ - 1280
34
+ tubelet_size:
35
+ - 240
36
+ - 320
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res_pairs
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 240
53
+ - 320
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '5_4_4_3'
66
+ strds_w: '5_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 20
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 16
73
+ n_tokens: '10_80_16_0'
74
+ token_dims: '200_240_240_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ finetune_model: checkpoints/patch_tubelet/pre_finetune/pre_finetune_320x240_patch_train_720p/epoch-last.pth
97
+ finetune_same_model: false
98
+ param_reg_mode: mod
99
+ param_reg_lambda_l1: 0.1
100
+ param_reg_lambda_l2: 0.0
101
+ env:
102
+ exp_name: teconerv
103
+ save_dir: checkpoints/teconerv/320x240_pairs_teco_train_720p
104
+ instance_tag: 320x240_pairs_teco_train_720p
105
+ tot_gpus: 4
106
+ cudnn: false
107
+ port: '15419'
108
+ wandb_upload: false
109
+ rank: 0
110
+ world_size: 4
111
+ gpu: 0
112
+ distributed: true
113
+ dist_backend: nccl
114
+ wandb_exp_name: null
115
+ wandb_run_id: none
teconerv/320x240_pairs_teco_train_720p/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3047d69f34fdaafeb71586eaf9d5c23dab3d540826a5b234f9003775d4c0df00
3
+ size 498296686
teconerv/384x270_pairs_teco/cfg.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res_pairs
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy_pairs
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_480p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 480
13
+ - 640
14
+ tubelet_size:
15
+ - 270
16
+ - 384
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_pairs_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 480
33
+ - 640
34
+ tubelet_size:
35
+ - 270
36
+ - 384
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res_pairs
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 270
53
+ - 384
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '6_5_3_3'
66
+ strds_w: '6_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 20
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 20
73
+ n_tokens: '16_100_16_0'
74
+ token_dims: '180_240_180_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ finetune_model: checkpoints/patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_480p/epoch-last.pth
97
+ finetune_same_model: false
98
+ param_reg_mode: mod
99
+ param_reg_lambda_l1: 0.1
100
+ param_reg_lambda_l2: 0.0
101
+ env:
102
+ exp_name: teconerv
103
+ save_dir: checkpoints/teconerv/384x270_pairs_teco
104
+ instance_tag: 384x270_pairs_teco
105
+ tot_gpus: 4
106
+ cudnn: false
107
+ port: '15419'
108
+ wandb_upload: false
109
+ rank: 0
110
+ world_size: 4
111
+ gpu: 0
112
+ distributed: true
113
+ dist_backend: nccl
114
+ wandb_exp_name: null
115
+ wandb_run_id: none
teconerv/384x270_pairs_teco/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbdb918f792590202bb56a40b89e684d9cd9a7e04eac73705a91562b5d33d7cb
3
+ size 499705774
teconerv/384x270_pairs_teco_train_720p/cfg.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ trainer: nerv_enc_trainer_full_res_pairs
2
+ train_dataset:
3
+ name: vidrec_dataset_patch_tubelet_sampler_lazy_pairs
4
+ args:
5
+ root_path: data/dataset_meta
6
+ split: train
7
+ frame_num: 8
8
+ rand_augment: '1_2_5'
9
+ csv_file: k400_2023_train_cls400_50_720p.js
10
+ cls_vid_num: '400_25'
11
+ crop_size:
12
+ - 720
13
+ - 1280
14
+ tubelet_size:
15
+ - 270
16
+ - 384
17
+ scale: 1
18
+ aspect_ratio: 1
19
+ rand_flip: 'no'
20
+ clips_per_video: 1
21
+ tubelets_per_clip: 1
22
+ loader:
23
+ batch_size: 32
24
+ num_workers: 16
25
+ test_dataset:
26
+ name: vidrec_dataset_patch_tubelet_inference_lazy_pairs_uvg
27
+ args:
28
+ root_path: data/dataset_meta
29
+ frame_num: 8
30
+ cls_vid_num: -1_-1
31
+ crop_size:
32
+ - 720
33
+ - 1280
34
+ tubelet_size:
35
+ - 270
36
+ - 384
37
+ csv_paths:
38
+ uvg: uvg_hd.csv
39
+ frames:
40
+ input: none
41
+ output: none
42
+ loader:
43
+ batch_size: 32
44
+ num_workers: 16
45
+ model:
46
+ name: nerv_enc_full_res_pairs
47
+ args:
48
+ tokenizer:
49
+ name: vidrec_tokenizer
50
+ args:
51
+ input_size:
52
+ - 270
53
+ - 384
54
+ patch_size: 32
55
+ padding: 0
56
+ frame_num: 8
57
+ eval_frames: none
58
+ img_groups: 1
59
+ hyponet:
60
+ name: hypo_convnets_full_res
61
+ args:
62
+ in_dim: 1
63
+ out_dim: 3
64
+ out_bias: tanh
65
+ strds_h: '6_5_3_3'
66
+ strds_w: '6_4_4_4'
67
+ ks: '1_3'
68
+ hid_dim: 20
69
+ size: none
70
+ act: gelu
71
+ use_pe: true
72
+ pe_dim: 20
73
+ n_tokens: '16_100_16_0'
74
+ token_dims: '180_240_180_0'
75
+ transformer_encoder:
76
+ name: transformer_encoder
77
+ args:
78
+ dim: 720
79
+ depth: 6
80
+ n_head: 12
81
+ head_dim: 64
82
+ ff_dim: 2880
83
+ optimizer:
84
+ name: adam
85
+ args:
86
+ lr: 0.0001
87
+ weight_decay: 0.0
88
+ lr_type: step
89
+ max_epoch: 50
90
+ eval_epoch: 50
91
+ vis_epoch: 2000
92
+ dump_ckt: 'no'
93
+ dump_pred: 'no'
94
+ dump_video: 'no'
95
+ generate_from_single_frame: false
96
+ finetune_model: checkpoints/patch_tubelet/pre_finetune/pre_finetune_384x270_patch_train_720p/epoch-last.pth
97
+ finetune_same_model: false
98
+ param_reg_mode: mod
99
+ param_reg_lambda_l1: 0.1
100
+ param_reg_lambda_l2: 0.0
101
+ env:
102
+ exp_name: teconerv
103
+ save_dir: checkpoints/teconerv/384x270_pairs_teco_train_720p
104
+ instance_tag: 384x270_pairs_teco_train_720p
105
+ tot_gpus: 4
106
+ cudnn: false
107
+ port: '15419'
108
+ wandb_upload: false
109
+ rank: 0
110
+ world_size: 4
111
+ gpu: 0
112
+ distributed: true
113
+ dist_backend: nccl
114
+ wandb_exp_name: null
115
+ wandb_run_id: none
teconerv/384x270_pairs_teco_train_720p/epoch-last.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac262b0a9e57ddf044c31693944a9d679d1aa79e28b850f2c8335f0f5abcb671
3
+ size 499705774