File size: 3,611 Bytes
2b2ae2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
    "config": "./config/mmvid_spot_config.yaml",
    "resume": null,
    "save_model": "./results/spot_2025_08_01_21_32_seed52_ema-1_mmvid/model",
    "save_mode": "best",
    "res_root_dir": "./results",
    "debug": false,
    "seed": 52,
    "no_cuda": false,
    "no_pin_memory": true,
    "pretrained_model": "./ckpt/img_size224_layer4_spot_wovisual_beit_softmax/dalle.pt",
    "use_fi_frames": false,
    "eval": false,
    "cuda": true,
    "dalle_param": {
        "vae": {
            "which_vae": "vqgan1024",
            "vae_path": "./pretrained_vqgan/spot_epoch=000042.ckpt",
            "image_size": 224
        },
        "bert": {
            "num_text_tokens": 0,
            "text_seq_len": 24,
            "dim": 768,
            "loss_img_weight": 7,
            "text_feature_dim": 0,
            "fixed_language_model": null,
            "text_emb_bottleneck": null,
            "which_transformer": "openai_clip_visual",
            "num_targets": 4,
            "num_visuals": 0,
            "beit": true,
            "use_separate_visual_emb": false,
            "insert_sep": false,
            "openai_clip_path": "./ckpt/ViT-B-32.pt",
            "vision_layers": 4
        },
        "skip_params": [
            "to_logits_vid.1.bias",
            "to_logits_vid.1.weight",
            "to_logits_vid.0.bias",
            "to_logits_vid.0.weight",
            "to_logits_rel.1.bias",
            "to_logits_rel.1.weight",
            "to_logits_rel.0.bias",
            "to_logits_rel.0.weight",
            "to_logits.1.bias",
            "to_logits.1.weight",
            "to_logits.0.bias",
            "to_logits.0.weight",
            "to_logits_text.1.bias",
            "to_logits_text.1.weight",
            "to_logits_text.0.bias",
            "to_logits_text.0.weight",
            "image_emb.weight"
        ],
        "freeze": false,
        "use_lora": false,
        "lora_config": {
            "r": 8,
            "lora_alpha": 16,
            "lora_dropout": 0.1,
            "bias": "none"
        }
    },
    "decoder_param": {
        "max_n_sen": 12,
        "max_t_len": 24,
        "max_v_len": 4,
        "exp_id": "init",
        "hidden_size": 512,
        "intermediate_size": 2048,
        "num_hidden_layers": 3,
        "num_attention_heads": 8,
        "mask_prob": 0.0,
        "hidden_dropout_prob": 0.1,
        "label_smoothing": 0.1,
        "recurrent": false,
        "untied": false,
        "mtrans": true,
        "use_beam": false,
        "vocab_size": 834,
        "mask_token_id": 7
    },
    "dset_name": "spot",
    "data_dir": "/home/sunjiayang/VFI4IDC_test/IDC_scratch_model/densevid_eval/spot_data",
    "video_feature_dir": "./data/spot/spot_processed",
    "word2idx_path": "./cache/spot_word2idx2.json",
    "glove_path": "./cache/yc2_vocab_glove.pt",
    "eval_tool_dir": "/home/sunjiayang/VFI4IDC_test/IDC_scratch_model/densevid_eval",
    "filtered": true,
    "filter_file_path": "./filter_files/spot_similarity_scores.json",
    "max_k": 2,
    "num_frames": 9,
    "recurrent": false,
    "untied": false,
    "mtrans": true,
    "use_beam": false,
    "image_size": 224,
    "n_epoch": 40,
    "batch_size": 16,
    "val_batch_size": 64,
    "max_es_cnt": 50,
    "lr": 5e-05,
    "lr_finetune": 2e-05,
    "lr_warmup_proportion": 0.1,
    "grad_clip": 1,
    "weight_decay": 0.01,
    "ema_decay": -1,
    "num_workers": 32,
    "temperature": 0.5,
    "res_dir": "./results/spot_2025_08_01_21_32_seed52_ema-1_mmvid",
    "log": "./results/spot_2025_08_01_21_32_seed52_ema-1_mmvid/model",
    "pin_memory": false
}