File size: 3,611 Bytes
2b2ae2b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | {
"config": "./config/mmvid_spot_config.yaml",
"resume": null,
"save_model": "./results/spot_2025_08_01_21_32_seed52_ema-1_mmvid/model",
"save_mode": "best",
"res_root_dir": "./results",
"debug": false,
"seed": 52,
"no_cuda": false,
"no_pin_memory": true,
"pretrained_model": "./ckpt/img_size224_layer4_spot_wovisual_beit_softmax/dalle.pt",
"use_fi_frames": false,
"eval": false,
"cuda": true,
"dalle_param": {
"vae": {
"which_vae": "vqgan1024",
"vae_path": "./pretrained_vqgan/spot_epoch=000042.ckpt",
"image_size": 224
},
"bert": {
"num_text_tokens": 0,
"text_seq_len": 24,
"dim": 768,
"loss_img_weight": 7,
"text_feature_dim": 0,
"fixed_language_model": null,
"text_emb_bottleneck": null,
"which_transformer": "openai_clip_visual",
"num_targets": 4,
"num_visuals": 0,
"beit": true,
"use_separate_visual_emb": false,
"insert_sep": false,
"openai_clip_path": "./ckpt/ViT-B-32.pt",
"vision_layers": 4
},
"skip_params": [
"to_logits_vid.1.bias",
"to_logits_vid.1.weight",
"to_logits_vid.0.bias",
"to_logits_vid.0.weight",
"to_logits_rel.1.bias",
"to_logits_rel.1.weight",
"to_logits_rel.0.bias",
"to_logits_rel.0.weight",
"to_logits.1.bias",
"to_logits.1.weight",
"to_logits.0.bias",
"to_logits.0.weight",
"to_logits_text.1.bias",
"to_logits_text.1.weight",
"to_logits_text.0.bias",
"to_logits_text.0.weight",
"image_emb.weight"
],
"freeze": false,
"use_lora": false,
"lora_config": {
"r": 8,
"lora_alpha": 16,
"lora_dropout": 0.1,
"bias": "none"
}
},
"decoder_param": {
"max_n_sen": 12,
"max_t_len": 24,
"max_v_len": 4,
"exp_id": "init",
"hidden_size": 512,
"intermediate_size": 2048,
"num_hidden_layers": 3,
"num_attention_heads": 8,
"mask_prob": 0.0,
"hidden_dropout_prob": 0.1,
"label_smoothing": 0.1,
"recurrent": false,
"untied": false,
"mtrans": true,
"use_beam": false,
"vocab_size": 834,
"mask_token_id": 7
},
"dset_name": "spot",
"data_dir": "/home/sunjiayang/VFI4IDC_test/IDC_scratch_model/densevid_eval/spot_data",
"video_feature_dir": "./data/spot/spot_processed",
"word2idx_path": "./cache/spot_word2idx2.json",
"glove_path": "./cache/yc2_vocab_glove.pt",
"eval_tool_dir": "/home/sunjiayang/VFI4IDC_test/IDC_scratch_model/densevid_eval",
"filtered": true,
"filter_file_path": "./filter_files/spot_similarity_scores.json",
"max_k": 2,
"num_frames": 9,
"recurrent": false,
"untied": false,
"mtrans": true,
"use_beam": false,
"image_size": 224,
"n_epoch": 40,
"batch_size": 16,
"val_batch_size": 64,
"max_es_cnt": 50,
"lr": 5e-05,
"lr_finetune": 2e-05,
"lr_warmup_proportion": 0.1,
"grad_clip": 1,
"weight_decay": 0.01,
"ema_decay": -1,
"num_workers": 32,
"temperature": 0.5,
"res_dir": "./results/spot_2025_08_01_21_32_seed52_ema-1_mmvid",
"log": "./results/spot_2025_08_01_21_32_seed52_ema-1_mmvid/model",
"pin_memory": false
} |