edje / config.json
shahafw's picture
Upload config.json with huggingface_hub
ae9188d verified
{
"flickr_image_root": "/gfs/shared/public/datasets/flickr",
"flickr_ann_root": "/gfs/shared/public/datasets/flickr",
"coco_image_root": "/gfs/shared/public/datasets/albef/coco/images",
"coco_ann_root": "/gfs/shared/public/datasets/albef/json_finetune",
"pretrain_files": [
"/gfs/shared/public/datasets/albef/ccs_synthetic_filtered_large_local_cp.json",
"/gfs/shared/public/datasets/albef/json_pretrain/json_pretrain/vg.json",
"/gfs/shared/public/datasets/albef/json_pretrain/json_pretrain/coco.json"
],
"finetune_files": [
"/gfs/shared/public/datasets/albef/json_pretrain/json_pretrain/coco.json"
],
"zs_dataset": "/gfs/shared/public/datasets/albef/vg_large_objects.json",
"batch_size": 32,
"data": "",
"teacher_siglip_path": "google/siglip2-large-patch16-384",
"siglip_path": "google/siglip2-large-patch16-384",
"language_model_path": "microsoft/MiniLM-L12-H384-uncased",
"multimodal_projection_hidden_dim": 8192,
"num_negatives_per_sample": 3,
"num_hard_negatives": 0,
"num_compressed_tokens": 64,
"weight_decay": 0.05,
"pretraining_lr": 0.0003,
"finetune_lr": 0.0003,
"min_lr": 1e-06,
"warmup_lr": 1e-06,
"lr_decay_rate": 0.9,
"max_epoch": 45,
"warmup_steps": 100,
"k": 10,
"test_set": "flickr",
"imagenet_classnames_path": "/gfs/shared/public/datasets/imagenet/classnames.txt",
"templates_path": "zeroshot_classification_templates.txt",
"experiment": {
"backend": "comet",
"project": "efficient-fusion",
"entity": "mitchellkt",
"run_name": "multimodal-retrieval-training"
},
"dry_mode": false,
"finetune": true,
"init_lr": 0.0003,
"train_file": [
"/gfs/shared/public/datasets/albef/json_pretrain/json_pretrain/coco.json"
]
}